**DEPENDENCIES** **INSTALLATION**

In [3]:
!pip install -q streamlit sentence-transformers qdrant-client transformers pdfplumber rank_bm25 nest-asyncio pyngrok
!npm install -g localtunnel
!pip install PyPDF2


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m95.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m306.7/306.7 kB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m111.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m93.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m104.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m90.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

**CODE** **HERE**

In [4]:
import os
import nest_asyncio
import subprocess
from pyngrok import ngrok
from sklearn.metrics.pairwise import cosine_similarity
import re
import PyPDF2

nest_asyncio.apply()

streamlit_script = """\
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models
from transformers import AutoModelForCausalLM, AutoTokenizer
import PyPDF2
import io
import torch
import asyncio
from rank_bm25 import BM25Okapi
import numpy as np
from typing import List
from sklearn.metrics.pairwise import cosine_similarity
import re

@st.cache_resource
def load_models():
    try:
        embedder = SentenceTransformer('BAAI/bge-m3')
        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", token="INCLUDE YOUR HF TOKEN HERE ")
        model = AutoModelForCausalLM.from_pretrained(
            "meta-llama/Llama-3.2-3B-Instruct",
            device_map="auto",
            torch_dtype=torch.float16,
            attn_implementation="eager",
            token="INCLUDE YOUR HF TOKEN HERE "
        )
        return embedder, tokenizer, model
    except Exception as e:
        st.error(f"Error loading models: {e}")
        return None, None, None

def process_document(file) -> List[str]:
    try:
        if file.name.endswith('.pdf'):
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(file.read()))
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() or ""
        else:
            text = file.read().decode('utf-8')
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=700,
            chunk_overlap=200,
            separators=["\\n\\n", "\\n", ". ", "! ", "? ", "。", "．"]
        )
        chunks = splitter.split_text(text)
        return chunks if chunks else []
    except Exception as e:
        st.error(f"Document Error: {e}")
        return []

@st.cache_resource
def init_qdrant():
    try:
        client = QdrantClient(":memory:")
        client.recreate_collection(
            collection_name="doc_chunks",
            vectors_config=models.VectorParams(size=1024, distance=models.Distance.COSINE)
        )
        return client
    except Exception as e:
        st.error(f"Qdrant Error: {e}")
        return None

def embed_and_store(chunks: List[str], embedder, client):
    embeddings = embedder.encode(chunks, batch_size=32, show_progress_bar=False)
    client.upsert(
        collection_name="doc_chunks",
        points=[models.PointStruct(id=i, vector=e.tolist(), payload={"text": c})
                for i, (e, c) in enumerate(zip(embeddings, chunks))]
    )
    return embeddings

async def retrieve_chunks(query: str, embedder, client, chunks: List[str], embeddings: np.ndarray, k=5) -> List[str]:
    query_embedding = embedder.encode([query])[0]
    dense_results = client.search(
        collection_name="doc_chunks",
        query_vector=query_embedding.tolist(),
        limit=k*3
    )
    dense_chunks = [p.payload["text"] for p in dense_results]
    tokenized_chunks = [c.split() for c in chunks]
    bm25 = BM25Okapi(tokenized_chunks)
    sparse_scores = bm25.get_scores(query.split())
    sparse_chunks = [chunks[i] for i in np.argsort(sparse_scores)[-k*3:][::-1]]
    combined = list(set(dense_chunks + sparse_chunks))
    unique_chunks = []
    for chunk in combined:
        if not unique_chunks:
            unique_chunks.append(chunk)
            continue
        chunk_embed = embedder.encode([chunk])[0]
        sims = cosine_similarity([chunk_embed], embedder.encode(unique_chunks))[0]
        if max(sims) < 0.85:
            unique_chunks.append(chunk)
    return unique_chunks[:k]

def validate_response(response: str) -> str:
    response = re.sub(r'(?i)\\b(\\w+)\\b(?=.*\\b\\1\\b)', '', response)
    return "\\n".join([l.strip() for l in response.split("\\n") if l.strip()])

def generate_response(query: str, context_chunks: List[str], tokenizer, model) -> str:
    context_str = "\\n- ".join(context_chunks)
    prompt = (
        "You are a professional document analyst tasked with answering questions based solely on the provided document context. Do not use external knowledge or make assumptions beyond the context. Use the following context:\\n"
        f"{context_str}\\n\\n"
        f"Question: {query}\\n"
        "Instructions:\\n"
        "1. Provide a detailed and elaborate explanation based only on the given context\\n"
        "2. Explain each of concept thoroughly\\n"
        "3. Use multiple paragraphs if needed\\n"
        "4. Include specific numbers/dates/percentages when available\\n"
        "5. Format each paragraph as a plain text bullet WITHOUT MARKDOWN:\\n"
        "   - Start each bullet with a dash '-'\\n"
        "   - Put exactly 1 empty line between bullets\\n"
        "   - Never use •, *, or any other markdown symbols\\n"
        "6. Follow this exact pattern:\\n"
        "8. After the bullet points, provide a concise summary paragraph that encapsulates the key points\\n"
        "8. Do not use markdown syntax (e.g., no asterisks or hashes)\\n"
        "9. Avoid repeating the same point multiple times\\n"
        "10. If the context lacks sufficient information, state that clearly and do not speculate\\n"
        "Answer:"
    )
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
    outputs = model.generate(
        **inputs,
        max_new_tokens=2000,
        temperature=0.3,
        top_p=0.7,
        repetition_penalty=1.2,
        do_sample=True
    )
    return validate_response(tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[-1])

st.title("Document Analysis Chatbot")

uploaded_file = st.file_uploader("Upload PDF/TXT", type=["pdf", "txt"])
if uploaded_file:
    with st.spinner("Processing..."):
        chunks = process_document(uploaded_file)
        if chunks:
            embedder, tokenizer, model = load_models()
            client = init_qdrant()
            if embedder and client:
                embeddings = embed_and_store(chunks, embedder, client)
                st.session_state.update(chunks=chunks, embeddings=embeddings)
                st.success("Ready for questions!")

query = st.text_input("Ask about the document:")
if query and 'chunks' in st.session_state:
    with st.spinner("Analyzing..."):
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        context_chunks = loop.run_until_complete(
            retrieve_chunks(query, embedder, client,
                           st.session_state['chunks'],
                           st.session_state['embeddings'])
        )

        response = generate_response(query, context_chunks, tokenizer, model)
        st.markdown(f"*Answer:*\\n{response}")
"""

with open("app.py", "w") as f:
    f.write(streamlit_script)

NGROK_AUTH_TOKEN = "INCLUDE YOUR NGROK AUTH TOKEN HERE"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

subprocess.Popen([
    "streamlit", "run", "app.py",
    "--server.port", "8501",
    "--server.enableCORS", "false",

    "--server.enableXsrfProtection", "false"
])

public_url = ngrok.connect(8501, bind_tls=True)
print(f"App URL: {public_url.public_url}")

App URL: https://d966-34-125-198-225.ngrok-free.app


In [None]:
ngrok.kill()