# Week 6 – Track C : Streamlit Application

# Step 1 | Install and Prepare Environment

In [1]:
# ✅ Force stable NumPy and install dependencies
!pip install -U numpy==1.26.4 --quiet
!pip install -qU google-generativeai streamlit faiss-cpu sentence-transformers spacy networkx matplotlib pandas python-docx PyMuPDF
!python -m spacy download en_core_web_sm


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m437.2 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m105.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [3

# Step 2 | Gemini API Configuration

In [1]:
import google.generativeai as genai

# ✅ Use your working Gemini API key
GOOGLE_API_KEY = "AIzaSyD38xnKP0Qj30ZEu1PKKpFBZH5TsH1RESg"
genai.configure(api_key=GOOGLE_API_KEY)

# ✅ Use a model that exists in Colab (checked earlier)
model = genai.GenerativeModel("models/gemini-2.5-flash")

print("✅ Gemini 2.5 Flash connected successfully.")


✅ Gemini 2.5 Flash connected successfully.


# Step 3 | Create Streamlit App File

In [2]:
%%writefile app.py
import streamlit as st
import google.generativeai as genai
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import spacy
from sentence_transformers import SentenceTransformer
import numpy as np, faiss

# --- Configuration ---
genai.configure(api_key="AIzaSyD38xnKP0Qj30ZEu1PKKpFBZH5TsH1RESg")
model = genai.GenerativeModel("models/gemini-2.5-flash")
nlp = spacy.load("en_core_web_sm")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# --- Streamlit UI ---
st.set_page_config(page_title="Next-Level RAG Demo", layout="wide")
st.title("🔍 Graph-RAG + Multi-Hop Reasoning App")

uploaded_files = st.file_uploader("📂 Upload project files", type=["pdf", "docx", "txt"], accept_multiple_files=True)
texts = []

if uploaded_files:
    import fitz, docx
    for f in uploaded_files:
        text = ""
        if f.name.endswith(".pdf"):
            with fitz.open(stream=f.read(), filetype="pdf") as pdf:
                for page in pdf:
                    text += page.get_text()
        elif f.name.endswith(".docx"):
            d = docx.Document(f)
            text = "\n".join([p.text for p in d.paragraphs])
        else:
            text = f.read().decode("utf-8")
        texts.append(text)

    st.success(f"✅ Loaded {len(texts)} documents.")

    # Chunk text
    chunks = []
    for t in texts:
        for i in range(0, len(t), 500):
            c = t[i:i+500].strip()
            if c:
                chunks.append(c)
    st.write(f"📑 Total chunks: {len(chunks)}")

    # Embeddings + FAISS
    embeddings = embedder.encode(chunks, convert_to_tensor=False)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))

    # Graph building
    G = nx.DiGraph()
    for t in chunks:
        doc = nlp(t)
        ents = [e.text for e in doc.ents]
        for i in range(len(ents) - 1):
            G.add_edge(ents[i], ents[i+1], relation="related_to")
    st.write(f"🧠 Graph built: {len(G.nodes)} nodes, {len(G.edges)} edges.")

    # Query input
    query = st.text_input("🔎 Enter your question", "Which author proposed Method B and which dataset did they evaluate it on?")
    if st.button("Run Query"):
        # --- Baseline Retrieval ---
        q_emb = embedder.encode([query], convert_to_tensor=False)
        D, I = index.search(np.array(q_emb), k=3)
        context = "\n".join([chunks[i] for i in I[0]])

        baseline_prompt = f"Using the context below, answer the question precisely.\n\n{context}\n\nQuestion: {query}"
        baseline_answer = model.generate_content(baseline_prompt).text.strip()

        # --- Graph-RAG Retrieval ---
        qdoc = nlp(query)
        qents = [e.text for e in qdoc.ents]
        neighborhood = []
        for e in qents:
            if e in G:
                for n in G.neighbors(e):
                    for t in chunks:
                        if n in t:
                            neighborhood.append(t)
        graph_ctx = "\n".join(neighborhood or chunks)

        graph_prompt = f"Answer the question using graph reasoning:\n\n{graph_ctx}\n\nQuestion: {query}"
        graph_answer = model.generate_content(graph_prompt).text.strip()

        # --- Display results ---
        st.subheader("🧩 Baseline RAG Answer")
        st.write(baseline_answer)
        st.subheader("🕸 Graph-RAG Answer")
        st.write(graph_answer)

        # --- Graph visualization ---
        fig, ax = plt.subplots(figsize=(6, 5))
        nx.draw(G, with_labels=False, node_color="skyblue", node_size=40, edge_color="gray", ax=ax)
        st.pyplot(fig)


Writing app.py


In [3]:
!streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false --server.port 8501



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.186.19.253:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m


In [None]:
# --- Install and import pyngrok ---
!pip install -q pyngrok streamlit

from pyngrok import ngrok
import time, os

# --- Authenticate ngrok with your token ---
ngrok.kill()  # close any previous tunnels
ngrok.set_auth_token("33RDPz5AZYmujipffSg0KqV0lvV_7LSnVw5jucbapPGL7NyCC")

# --- Create a public tunnel for Streamlit (port 8501) ---
public_url = ngrok.connect(8501).public_url
print("🌐 Public URL:", public_url)

# --- Run Streamlit app ---
!streamlit run app.py --server.port 8501 --server.enableCORS false --server.enableXsrfProtection false &
time.sleep(5)

print("✅ App is launching... click the link above to open it.")


🌐 Public URL: https://boyd-unribboned-noncontemptibly.ngrok-free.dev

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.186.19.253:8501[0m
[0m
2025-10-07 04:36:32.927548: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759811792.949104    2034 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759811792.955981    2034 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1759811792.972827