#### **Cell 1: Setup - Install Libraries and Import Dependencies**


In [2]:
!pip install -qqq transformers sentence-transformers faiss-cpu langchain streamlit pyngrok PyPDF2 langchain-community

import os
import torch
import warnings
import io # For handling file uploads in Streamlit

# Hugging Face and LangChain imports
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings # Changed from langchain.embeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline # Changed from langchain.llms

# Streamlit and Ngrok imports
import streamlit as st # This import is mainly for type hinting/IDE, actual app runs from app.py
from pyngrok import ngrok
import subprocess # To run Streamlit in the background

# PDF parsing
import PyPDF2

# Suppress specific warnings that might clutter output
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
warnings.filterwarnings("ignore", category=FutureWarning)

print("Libraries installed and dependencies imported.")

# --- Configuration Parameters ---
# Define chunking parameters
CHUNK_SIZE = 400  # Number of tokens/characters per chunk. Adjust based on document type.
CHUNK_OVERLAP = 40 # Overlap between chunks to maintain context.

# Model names
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2' # Recommended embedding model for good balance of speed/performance
LLM_MODEL_NAME = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0' # Small, accessible LLM for Colab free tier

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hLibraries installed and dependencies imported.


#### **Cell 2: Core RAG Functions**


In [6]:
# Cache the embedding model to avoid reloading on every Streamlit rerun
@st.cache_resource
def load_embedding_model(model_name):
    """Loads the HuggingFace embedding model."""
    print(f"Loading embedding model: {model_name}...")
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    print("Embedding model loaded.")
    return embeddings

# Cache the LLM to avoid reloading on every Streamlit rerun
@st.cache_resource
def load_llm(model_name, device):
    """Loads the LLM and sets up the HuggingFace pipeline."""
    print(f"Loading tokenizer for LLM: {model_name}...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    print("Tokenizer loaded.")

    print(f"Loading LLM: {model_name}...")
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            trust_remote_code=True,
        ).to(device)
        print("LLM loaded successfully.")

        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            return_full_text=False,
            pad_token_id=tokenizer.eos_token_id # Set pad_token_id to avoid warnings
        )
        llm = HuggingFacePipeline(pipeline=pipe)
        print("LLM configured via HuggingFacePipeline for LangChain.")
        return llm
    except Exception as e:
        print(f"ERROR: Could not load LLM '{model_name}'. This might be due to insufficient VRAM or model compatibility issues. Error details: {e}")
        class DummyLLM:
            def __call__(self, prompt, stop=None):
                return "Dummy answer: LLM failed to load. Please check your LLM configuration."
        return DummyLLM()

# Function to process the uploaded document and create the vector store
@st.cache_data
def process_document(uploaded_file_content_bytes, file_extension, _embeddings_model, chunk_size, chunk_overlap):
    """Processes document content, chunks it, and creates a FAISS vector store."""
    print("Processing document and creating chunks...")
    text_content = ""
    if file_extension == 'pdf':
        try:
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file_content_bytes))
            for page_num in range(len(pdf_reader.pages)):
                text_content += pdf_reader.pages[page_num].extract_text()
            print("Content loaded from PDF.")
        except Exception as e:
            st.error(f"Error reading PDF: {e}")
            return None
    elif file_extension == 'txt':
        text_content = uploaded_file_content_bytes.decode("utf-8")
        print("Content loaded from TXT.")
    else:
        st.error("Unsupported file type. Please upload a .txt or .pdf file.")
        return None

    if not text_content.strip():
        st.warning("Uploaded document is empty or could not be read.")
        return None

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        add_start_index=True
    )
    chunks = text_splitter.create_documents([text_content])
    print(f"Number of chunks created: {len(chunks)}")

    if not chunks:
        st.warning("No chunks could be created from the document. It might be too short or unreadable.")
        return None

    print("Creating FAISS vector store...")
    vector_store = FAISS.from_documents(chunks, _embeddings_model)
    print("FAISS vector store created successfully.")
    return vector_store

print("Core RAG functions defined.")



Core RAG functions defined.


#### **Cell 3: Streamlit Application Code**

In [7]:
streamlit_app_code = f"""
import streamlit as st
import os
import torch
import warnings
import io

# Hugging Face and LangChain imports
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

# PDF parsing
import PyPDF2

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
warnings.filterwarnings("ignore", category=FutureWarning)

# --- Configuration Parameters (duplicated for app.py) ---
CHUNK_SIZE = {CHUNK_SIZE}
CHUNK_OVERLAP = {CHUNK_OVERLAP}
EMBEDDING_MODEL_NAME = '{EMBEDDING_MODEL_NAME}'
LLM_MODEL_NAME = '{LLM_MODEL_NAME}'

# Determine the device to use (GPU if available, otherwise CPU)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Cache the embedding model to avoid reloading on every Streamlit rerun
@st.cache_resource
def load_embedding_model(model_name):
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    return embeddings

# Cache the LLM to avoid reloading on every Streamlit rerun
@st.cache_resource
def load_llm(model_name, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            trust_remote_code=True,
        ).to(device)
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            return_full_text=False,
            pad_token_id=tokenizer.eos_token_id
        )
        llm = HuggingFacePipeline(pipeline=pipe)
        return llm
    except Exception as e:
        st.error(f"Error loading LLM: {{e}}. Using a dummy LLM.")
        class DummyLLM:
            def __call__(self, prompt, stop=None):
                return "Dummy answer: LLM failed to load."
        return DummyLLM()

# Function to process the uploaded document and create the vector store
@st.cache_data
def process_document(uploaded_file_content_bytes, file_extension, _embeddings_model, chunk_size, chunk_overlap):
    text_content = ""
    if file_extension == 'pdf':
        try:
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file_content_bytes))
            for page_num in range(len(pdf_reader.pages)):
                text_content += pdf_reader.pages[page_num].extract_text()
        except Exception as e:
            st.error(f"Error reading PDF: {{e}}")
            return None
    elif file_extension == 'txt':
        text_content = uploaded_file_content_bytes.decode("utf-8")
    else:
        st.error("Unsupported file type. Please upload a .txt or .pdf file.")
        return None

    if not text_content.strip():
        st.warning("Uploaded document is empty or could not be read.")
        return None

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        add_start_index=True
    )
    chunks = text_splitter.create_documents([text_content])

    if not chunks:
        st.warning("No chunks could be created from the document. It might be too short or unreadable.")
        return None

    vector_store = FAISS.from_documents(chunks, _embeddings_model)
    return vector_store

# --- Streamlit UI ---
st.set_page_config(page_title="RAG QA System", layout="wide")

st.title("📄 Mini LLM-Powered QA System (RAG)")
st.markdown("Upload a document, ask a question, and get answers powered by an open-source LLM!")

# Load models (cached)
embeddings = load_embedding_model(EMBEDDING_MODEL_NAME)
llm = load_llm(LLM_MODEL_NAME, DEVICE)

# File uploader
uploaded_file = st.file_uploader("Upload your document (.txt or .pdf)", type=["txt", "pdf"])

vector_store = None
if uploaded_file is not None:
    file_extension = uploaded_file.name.split('.')[-1].lower()
    # Process document and create vector store
    with st.spinner("Processing document and building knowledge base..."):
        # Pass the file content as bytes
        vector_store = process_document(uploaded_file.read(), file_extension, embeddings, CHUNK_SIZE, CHUNK_OVERLAP)
    if vector_store:
        st.success("Document processed and knowledge base ready!")
    else:
        st.error("Failed to process document. Please check the file content and type.")

# Query interface
if vector_store:
    st.subheader("Ask a Question")
    query = st.text_area("Enter your question here:", height=100)

    if st.button("Get Answer"):
        if query:
            with st.spinner("Retrieving answer..."):
                retriever = vector_store.as_retriever(search_kwargs={{"k": 3}})
                qa_chain = RetrievalQA.from_chain_type(
                    llm=llm,
                    chain_type="stuff",
                    retriever=retriever,
                    return_source_documents=True
                )
                response = qa_chain({{"query": query}})

            st.subheader("Generated Answer:")
            st.write(response["result"])

            st.subheader("Source Documents:")
            if response["source_documents"]:
                for i, doc in enumerate(response["source_documents"]):
                    st.markdown(f"**Chunk {{i+1}} (Source Index: {{doc.metadata.get('start_index', 'N/A')}}):**")
                    st.info(doc.page_content)
            else:
                st.write("No relevant source documents found.")
        else:
            st.warning("Please enter a question.")
else:
    st.info("Please upload a document to get started.")

st.markdown("---")
st.markdown("Built for Wundrsight SWE Intern Technical Assignment.")
"""

with open("app.py", "w") as f:
    f.write(streamlit_app_code)

print("Streamlit app code written to 'app.py'.")

Streamlit app code written to 'app.py'.


#### **Cell 4: Run Streamlit App with Ngrok**

In [9]:
NGROK_AUTH_TOKEN = "2z2QXqdZspDDxtlmAXuITrNtfVe_4SS7Juw42gtFAQG3TTDzn"

if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("\nWARNING: Please replace 'YOUR_NGROK_AUTH_TOKEN' in the code with your actual ngrok authentication token.")
    print("You can get it from https://dashboard.ngrok.com/get-started/your-authtoken after signing up.")
else:
    # Authenticate ngrok
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    print("Ngrok authentication token set.")

    # Kill any running ngrok processes to avoid conflicts
    # Use pkill for a more forceful kill
    !pkill ngrok

    # Start ngrok tunnel for Streamlit (port 8501 is default for Streamlit)
    public_url = ngrok.connect(8501)
    print(f"Streamlit App Tunnel URL: {public_url}")
    print("Click the URL above to open your Streamlit app in a new tab.")

    # Run the Streamlit app in the background
    # We use subprocess.Popen to keep the Colab cell running and display the output
    try:
        # Start Streamlit in a separate process
        # --server.port 8501: Ensures Streamlit runs on the port ngrok is tunneling
        # --server.headless true: Prevents Streamlit from trying to open a browser on the Colab server
        process = subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501", "--server.headless", "true"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        # Give it a moment to start
        import time
        time.sleep(5)
        print("\nStreamlit app is running in the background. Check the ngrok URL above.")
        print("You can monitor Streamlit logs by running `!cat ~/.streamlit/logs.txt` in a new cell if needed.")

    except Exception as e:
        print(f"Error starting Streamlit: {e}")
        print("Please ensure 'app.py' was created successfully in the previous cell and ngrok token is valid.")

Ngrok authentication token set.
Streamlit App Tunnel URL: NgrokTunnel: "https://a44f212ffc36.ngrok-free.app" -> "http://localhost:8501"
Click the URL above to open your Streamlit app in a new tab.

Streamlit app is running in the background. Check the ngrok URL above.
You can monitor Streamlit logs by running `!cat ~/.streamlit/logs.txt` in a new cell if needed.
