In [None]:
!pip install boto3 faiss-cpu sentence-transformers transformers accelerate huggingface_hub pypdf PyPDF2 PyCryptodome qdrant-client streamlit

import io, uuid, torch
import numpy as np
from google.colab import userdata
import boto3
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, FieldCondition, Filter, MatchValue

AWS_ACCESS_KEY = userdata.get("AWS_ACCESS_KEY")
AWS_SECRET_KEY = userdata.get("AWS_SECRET_KEY")
S3_BUCKET = "rag-vector-db-poc"


s3 = boto3.client(
    's3',
    aws_access_key_id=AWS_ACCESS_KEY,
    aws_secret_access_key=AWS_SECRET_KEY
)

QDRANT_HOST = "54.147.169.138"
QDRANT_PORT = 6333
COLLECTION = "docs_chunks"

client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)

client.recreate_collection(
    collection_name=COLLECTION,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)

Collecting boto3
  Downloading boto3-1.39.2-py3-none-any.whl.metadata (6.6 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting pypdf
  Downloading pypdf-5.7.0-py3-none-any.whl.metadata (7.2 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting PyCryptodome
  Downloading pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting qdrant-client
  Downloading qdrant_client-1.14.3-py3-none-any.whl.metadata (10 kB)
Collecting botocore<1.40.0,>=1.39.2 (from boto3)
  Downloading botocore-1.39.2-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.14.0,>=0.13.0 (from boto3)
  Downloading s3transfer-0.13.0-py3-none-any.whl.metadata (1.7 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant-client)
  Downloading portalocker-2

  client.recreate_collection(


True

In [None]:
def download_pdf_from_s3(bucket, key):
    response = s3.get_object(Bucket=bucket, Key=key)
    return io.BytesIO(response['Body'].read())

def list_pdf_keys(bucket, prefix=""):
    keys = []
    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        for obj in page.get("Contents", []):
            if obj["Key"].endswith(".pdf"):
                keys.append(obj["Key"])
    return keys

def extract_text_from_pdf(pdf_io):
    reader = PdfReader(pdf_io)
    return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])

def chunk_with_metadata(text, source_id, chunk_size=500, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    chunks = splitter.split_text(text)
    return [{"text": chunk, "source": source_id} for chunk in chunks]

embed_model = SentenceTransformer("all-MiniLM-L6-v2")

def embed_chunks(chunks):
    texts = [c["text"] for c in chunks]
    return embed_model.encode(texts, show_progress_bar=True)

def upload_chunks_to_qdrant(chunks, embeddings):
    points = [
        PointStruct(
            id=str(uuid.uuid4()),
            vector=embedding,
            payload={"text": chunk["text"], "source": chunk["source"]}
        )
        for chunk, embedding in zip(chunks, embeddings)
    ]
    client.upsert(collection_name=COLLECTION, points=points)

def process_and_upload_pdfs(bucket, keys):
    for key in keys:
        try:
            print(f"📄 Processing: {key}")
            pdf_io = download_pdf_from_s3(bucket, key)
            text = extract_text_from_pdf(pdf_io)
            chunks = chunk_with_metadata(text, source_id=key)
            embeddings = embed_chunks(chunks)
            upload_chunks_to_qdrant(chunks, embeddings)
            print(f"✅ Uploaded: {key}")
        except Exception as e:
            print(f"❌ Failed: {key}\n{e}")

pdf_keys = list_pdf_keys(S3_BUCKET)

process_and_upload_pdfs(S3_BUCKET, pdf_keys)

In [1]:
%%writefile app.py
import streamlit as st
import uuid, time
import numpy as np
from collections import defaultdict
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
from openai import AzureOpenAI

# ------------------- CONFIG -------------------
QDRANT_HOST = "54.147.169.138"
QDRANT_PORT = 6333
COLLECTION = "docs_chunks"
EMBED_DIM = 384

# Azure OpenAI Config (replace with actual values)
userdata.get("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = userdata.get("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = userdata.get("AZURE_OPENAI_API_KEY")
AZURE_DEPLOYMENT_NAME = "gpt-4o"

# ------------------- LOAD MODELS -------------------
@st.cache_resource
def load_embedder():
    return SentenceTransformer("all-MiniLM-L6-v2")

@st.cache_resource
def load_azure_client():
    return AzureOpenAI(
        api_key=AZURE_OPENAI_API_KEY,
        api_version="2023-05-15",
        azure_endpoint=AZURE_OPENAI_ENDPOINT
    )

embed_model = load_embedder()
azure_client = load_azure_client()

# ------------------- QDRANT -------------------
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)

# ------------------- MONITORING -------------------
monitoring = {
    "access_count": defaultdict(int),
    "latencies": [],
    "query_log": []
}

# ------------------- RAG -------------------
def search_qdrant(query, k=3):
    vec = embed_model.encode([query])[0]
    results = client.search(COLLECTION, query_vector=vec, limit=k)
    return [{"text": r.payload["text"], "source": r.payload["source"]} for r in results]

def build_prompt(query, top_chunks):
    context = ""
    source_refs = {}
    for i, chunk in enumerate(top_chunks, start=1):
        ref = f"[{i}]"
        context += f"{ref} ({chunk['source']}):\n{chunk['text']}\n\n"
        source_refs[ref] = chunk["source"]
    prompt = f"""You are a helpful assistant. Use the following context to answer the question. Cite sources using [1], [2], etc.

Context:
{context}

Question: {query}

Answer:"""
    return prompt, source_refs

def rag_query(query, k=3):
    start = time.time()
    chunks = search_qdrant(query, k)
    prompt, refs = build_prompt(query, chunks)

    response = azure_client.chat.completions.create(
        model=AZURE_DEPLOYMENT_NAME,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        max_tokens=512
    )

    answer = response.choices[0].message.content.strip()
    latency = round((time.time() - start) * 1000, 2)

    for ref in refs.values():
        monitoring["access_count"][ref] += 1
    monitoring["latencies"].append(latency)
    monitoring["query_log"].append({"query": query, "sources": list(refs.values()), "latency_ms": latency})

    return answer, refs, latency

# ------------------- UI -------------------
st.set_page_config(page_title="RAG Chat with Citations", layout="wide")
st.title("💬 RAG Assistant with Qdrant + Azure OpenAI")

st.markdown("Ask questions based on the preloaded document index.")

query = st.text_input("Enter your question here:")

if query:
    with st.spinner("Searching and generating response..."):
        answer, refs, latency = rag_query(query)

        st.markdown("### 🧠 Answer")
        st.write(answer)

        st.markdown("### 📎 Citations")
        for ref, src in refs.items():
            st.write(f"{ref}: {src}")

        st.markdown("### ⏱️ Latency")
        st.write(f"{latency} ms")

# ------------------- Sidebar: Monitoring -------------------
st.sidebar.title("📊 Monitoring")
st.sidebar.write(f"Total queries: {len(monitoring['query_log'])}")
if monitoring["latencies"]:
    st.sidebar.write(f"Average latency: {np.mean(monitoring['latencies']):.2f} ms")
    st.sidebar.write("Top documents accessed:")
    top_sources = sorted(monitoring["access_count"].items(), key=lambda x: x[1], reverse=True)
    for src, count in top_sources:
        st.sidebar.write(f"• {src}: {count}x")


Writing app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501