In [45]:
from diagrams import Diagram, Cluster
from diagrams.aws.storage import S3
from diagrams.custom import Custom
from diagrams.onprem.client import Users
from diagrams.onprem.compute import Server
from diagrams.programming.language import Python

with Diagram("Data Architecture Diagram", show=False):

    user = Users("User")
    with Cluster("Containerization"):
        docker = Custom("", "docker.png")
        with Cluster("User Interaction"):
            streamlit = Custom("Streamlit", "streamlit.png")
            fastapi = Custom("FastAPI", "fastapi.png")

        with Cluster("Data Processing"):
            airflow = Custom("Airflow", "airflow.png")
            with Cluster("Web Scraping"):
                webscraping = Python("BeautifulSoup, Selenium")

            with Cluster("PDF Scraping"):
                data_extraction = Server("Grobid")
            pydantic = Custom("Pydantic", "pydantic-logo.png")

    with Cluster("Data Storage"):
        s3 = S3("S3")
        snowflake = Custom("Snowflake", "snowflake.png")


    user >> streamlit
    streamlit >> s3
    streamlit >> fastapi
    fastapi >> airflow
    airflow >> data_extraction
    airflow >> webscraping
    data_extraction >> pydantic
    webscraping >> pydantic
    pydantic >> snowflake
    snowflake >> fastapi
    fastapi >> streamlit
