In [1]:
#Overall Project Architecture
from diagrams import Diagram, Cluster, Edge
from diagrams.custom import Custom
from diagrams.aws.storage import S3
from diagrams.onprem.container import Docker

# Define the image paths for custom logos
fastapi_logo = "fastapi_logo.png"
streamlit_logo = "streamlit_logo.png"
docker_logo = "docker_logo.png"
airflow_logo = "airflow_logo.png"
aws_logo = "s3_logo.png"
pinecone_logo = "pinecone_logo.png"
snowflake_logo = "snowflake_logo.png"
llama3_logo = "llama-3_1-nemotron-51b-instruct_logo.png"
embedqa_logo = "nv-embedqa-e5-v5_logo.png"

with Diagram("Overall Project Architecture", show=True, direction="LR"):

    # User and Frontend interactions
    user = Custom("User", "user_logo.png")

    with Cluster("Frontend"):
        streamlit_app = Custom("Streamlit App", streamlit_logo)
        user >> Edge(label="Access Frontend") >> streamlit_app

    # Backend and Service Layer
    with Cluster("Backend Services"):
        fastapi_service = Custom("FastAPI Application", fastapi_logo)

        # Routers within FastAPI
        with Cluster("FastAPI Routers"):
            snowflake_router = Custom("Snowflake Router", snowflake_logo)
            s3_router = Custom("S3 Router", aws_logo)
            summarization_router = Custom("Summarization Router", llama3_logo)
            rag_router = Custom("RAG Router", embedqa_logo)
            
            fastapi_service >> [snowflake_router, s3_router, summarization_router, rag_router]

    # Data Ingestion Pipeline using Airflow
    with Cluster("Data Ingestion Pipeline"):
        airflow_pipeline = Custom("Airflow DAGs", airflow_logo)
        docker_container = Docker("Dockerized Airflow")
        airflow_pipeline >> docker_container

    # Databases and Vector Store
    s3_storage = S3("S3 Bucket")
    snowflake_db = Custom("Snowflake DB", snowflake_logo)
    pinecone_db = Custom("Pinecone Vector DB", pinecone_logo)
    
    # FastAPI service interactions
    fastapi_service >> Edge(label="Fetch Data") >> snowflake_db
    fastapi_service >> Edge(label="Store & Fetch Files, Notes") >> s3_storage

    # Connect RAG Router to Pinecone for Indexing and Querying
    rag_router >> Edge(label="Index Document & Notes Embeddings") >> pinecone_db
    pinecone_db >> Edge(label="Retrieve Top-5 Embeddings") >> rag_router

    # LLM integration for Summaries and Q&A
    with Cluster("LLM Processing"):
        llm_service = Custom("Llama-3 70B\n(NVIDIA)", llama3_logo)
        summarization_router >> Edge(label="Generate Summaries") >> llm_service
        rag_router >> Edge(label="Send Top-5 Vectors & Query") >> llm_service
        llm_service >> Edge(label="Return Summary/Answer") >> fastapi_service
    
    # Airflow interactions with data storage
    docker_container >> Edge(label="Scrape, Upload Data") >> s3_storage
    docker_container >> Edge(label="Setup & Load Data") >> snowflake_db

    # Frontend to Backend interactions
    streamlit_app >> Edge(label="Fetch Details") >> fastapi_service
    streamlit_app >> Edge(label="Query Q&A") >> fastapi_service
    streamlit_app >> Edge(label="Save Research Notes") >> fastapi_service
    fastapi_service >> Edge(label="Store Research Notes") >> s3_storage
    fastapi_service >> Edge(label="Index Research Notes") >> rag_router


In [2]:
#Backend_Workflow_Overview
from diagrams import Diagram, Cluster, Edge
from diagrams.aws.storage import S3
from diagrams.onprem.container import Docker
from diagrams.custom import Custom

# Define the image paths for custom logos
fastapi_logo = "fastapi_logo.png"
s3_logo = "s3_logo.png"
pinecone_logo = "pinecone_logo.png"
llama3_logo = "llama-3_1-nemotron-51b-instruct_logo.png"
embedqa_logo = "nv-embedqa-e5-v5_logo.png"
airflow_logo = "airflow_logo.png"
snowflake_logo = "snowflake_logo.png"
streamlit_logo = "streamlit_logo.png"

with Diagram("Backend_Workflow_Overview", show=True):

    # Define Airflow Pipelines Cluster using Custom Logo
    with Cluster("Airflow Pipeline"):
        airflow_pipeline = Custom("Airflow DAGs\n(PDF Extraction, Scraping, Data Loading)", airflow_logo)
        docker_container = Docker("Dockerized Airflow Environment")
        airflow_pipeline >> docker_container

    # Define the S3 interactions
    s3_bucket = S3("S3 Bucket\n(Publication Data)")

    # Define Snowflake Database
    snowflake_db = Custom("Snowflake Data Warehouse", snowflake_logo)

    # Define Pinecone Vector Database
    pinecone_db = Custom("Pinecone Vector DB", pinecone_logo)

    # Define FastAPI Cluster
    with Cluster("FastAPI Service"):
        fastapi_app = Custom("FastAPI Application", fastapi_logo)
        LLM= Custom("Llama-3 70B", llama3_logo)
        rag_indexing = Custom("RAG Indexing\n(NVIDIA Embeddings)", embedqa_logo)

        # Define Data Load Flow
        fastapi_app - Edge(label="Fetch Publications") - snowflake_db
        fastapi_app - Edge(label="Fetch Files & Store Summaries/Notes") - s3_bucket
        fastapi_app - Edge(label="Summarize Publications") - LLM
        fastapi_app - Edge(label="Query Documents or Research Notes") - rag_indexing
        LLM - Edge(label="Store Summaries") - s3_bucket
        rag_indexing - Edge(label="Store Embeddings") - pinecone_db

    # User and Frontend interactions
    user_frontend = Custom("User via Streamlit App", streamlit_logo)
    user_frontend >> Edge(label="Select Document") >> fastapi_app
    user_frontend << Edge(label="Display Summary & Research Notes") << fastapi_app
    user_frontend << Edge(label="Display Q&A Responses") << fastapi_app

    # Airflow Pipelines interactions with S3 and Snowflake
    docker_container >> Edge(label="Scrape, Extract & Upload") >> s3_bucket
    docker_container >> Edge(label="Setup & Load Data") >> snowflake_db

    # Illustrate Query Process with Top-5 Embeddings
    with Cluster("Document Query Process"):
        top5_embeddings = pinecone_db >> Edge(label="Retrieve Top-5 Embeddings") >> rag_indexing
        rag_indexing >> Edge(label="Send Question & Top-5 Embeddings") >> LLM
        LLM >> Edge(label="Generate Summary with Llama-3 70B") >> fastapi_app
        LLM >> Edge(label="Generate Answer with Llama-3 70B") >> fastapi_app
        fastapi_app >> Edge(label="Return Answer") >> user_frontend

    # Illustrate Research Notes Indexing
    user_frontend >> Edge(label="Save Research Notes") >> fastapi_app
    fastapi_app >> Edge(label="Index Research Notes or Documents") >> rag_indexing
    rag_indexing >> Edge(label="Store Research Notes Embeddings or Documents Embedding") >> pinecone_db


In [3]:
#User_Flow_Overview
from diagrams import Diagram, Cluster, Edge
from diagrams.custom import Custom

# Define the image paths for custom logos
user_logo = "user_logo.png"
streamlit_logo = "streamlit_logo.png"
fastapi_logo = "fastapi_logo.png"
s3_logo = "s3_logo.png"
pinecone_logo = "pinecone_logo.png"
llama3_logo = "llama-3_1-nemotron-51b-instruct_logo.png"
embedqa_logo = "nv-embedqa-e5-v5_logo.png"
snowflake_logo = "snowflake_logo.png"

with Diagram("User_Flow_Overview", show=True, direction="LR"):

    # User interacts via the Streamlit App
    user = Custom("User", user_logo)

    # Streamlit App with three main pages
    with Cluster("Streamlit App"):
        grid_view = Custom("Grid View", streamlit_logo)
        detail_view = Custom("Detail View", streamlit_logo)
        qa_interface = Custom("Q&A Interface", streamlit_logo)

        # User interactions with each Streamlit page
        user >> Edge(label="Explore & Select Publications") >> grid_view
        grid_view >> Edge(label="View Publication Details") >> detail_view
        detail_view >> Edge(label="View & Refresh Summary, View/Add Research Notes") >> user
        detail_view >> Edge(label="Take Me to Q&A Interface") >> qa_interface
        qa_interface >> Edge(label="Perform Q&A & Add Responses to Research Notes") >> user

    # Backend Service with FastAPI
    with Cluster("Backend Service - FastAPI"):
        fastapi_app = Custom("FastAPI Service", fastapi_logo)
        snowflake_db = Custom("Snowflake DB", snowflake_logo)
        s3_bucket = Custom("S3 Bucket", s3_logo)
        pinecone_db = Custom("Pinecone Vector DB", pinecone_logo)
        summarization_service = Custom("Llama-3 70B\n(NVIDIA)", llama3_logo)
        rag_indexing_service = Custom("RAG Indexing\n(NVIDIA Embeddings)", embedqa_logo)

        # FastAPI interactions with databases and services
        fastapi_app >> Edge(label="Fetch Publication Details") >> snowflake_db
        fastapi_app >> Edge(label="Fetch & Store Summaries, Files, Notes") >> s3_bucket
        fastapi_app >> Edge(label="Index Document Embeddings if not Present") >> rag_indexing_service
        rag_indexing_service >> Edge(label="Store Document Embeddings") >> pinecone_db
        fastapi_app >> Edge(label="Generate Summary/Answer") >> summarization_service

        # Summarization and embedding interactions
        summarization_service >> Edge(label="Return Summary/Answer") >> fastapi_app

    # User to Backend interaction
    grid_view >> Edge(label="Request Publication Details") >> fastapi_app
    detail_view >> Edge(label="Request Summaries & Research Notes") >> fastapi_app
    qa_interface >> Edge(label="Request Q&A Responses") >> fastapi_app
    qa_interface << Edge(label="Return Q&A Responses") << fastapi_app

    # Research Notes indexing process
    qa_interface >> Edge(label="Save Research Notes") >> fastapi_app
    fastapi_app >> Edge(label="Store Research Notes") >> s3_bucket
    fastapi_app >> Edge(label="Index Research Notes") >> rag_indexing_service
    rag_indexing_service >> Edge(label="Store Research Notes Embeddings") >> pinecone_db
