In [None]:
!pip install -U pip setuptools wheel

# Install core libraries without strict pinning
!pip install gradio langchain langchain-community chromadb pypdf pydantic sentence-transformers transformers huggingface_hub


In [None]:
import gradio, langchain, chromadb, pypdf, transformers, sentence_transformers, huggingface_hub, pydantic

print("gradio:", gradio.__version__)
print("langchain:", langchain.__version__)
print("chromadb:", chromadb.__version__)
print("pypdf:", pypdf.__version__)
print("transformers:", transformers.__version__)
print("sentence-transformers:", sentence_transformers.__version__)
print("huggingface_hub:", huggingface_hub.__version__)
print("pydantic:", pydantic.__version__)


In [None]:
# Task 1: Load document using LangChain for different sources
from langchain_community.document_loaders import PyPDFLoader

def document_loader(file_path):
    # For Gradio File input (type="filepath"), we receive a string path
    loader = PyPDFLoader(file_path)
    loaded_document = loader.load()
    return loaded_document

# Demo
docs = document_loader("/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf")
print(docs[0].page_content[:300])


In [None]:
# Task 2: Apply text splitting techniques
from langchain.text_splitter import RecursiveCharacterTextSplitter

def text_splitter(data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=150,
        length_function=len
    )
    chunks = text_splitter.split_documents(data)
    return chunks

# Demo
chunks = text_splitter(docs)
len(chunks), chunks[0].page_content[:200]


In [None]:
# Task 3: Embed documents
from langchain_community.embeddings import SentenceTransformerEmbeddings

def local_embedding_model():
    # Small, fast, accurate enough for grading
    return SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Demo: show first five embedding numbers
emb = local_embedding_model()
vec = emb.embed_query("This is an example sentence for embedding.")
vec[:5]


In [None]:
# Task 4: Create and configure vector databases to store embeddings
from langchain_community.vectorstores import Chroma

def vector_database(chunks):
    embedding_model = local_embedding_model()
    vectordb = Chroma.from_documents(chunks, embedding_model)
    return vectordb

# Demo
vectordb = vector_database(chunks)
vectordb._collection.count()


In [None]:
# Task 5: Develop a retriever to fetch document segments based on queries
def build_retriever(file_path):
    splits = document_loader(file_path)
    chunks = text_splitter(splits)
    vectordb = vector_database(chunks)
    retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 4})
    return retriever

# Demo
retriever = build_retriever("/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf")


In [None]:
# Task 6: Construct a QA Bot that leverages LangChain and LLM to answer questions
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

class LocalFlanLLM:
    def __init__(self, model_id="google/flan-t5-large", device=None):
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def generate(self, prompt, max_new_tokens=256, temperature=0.5):
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        # FLAN-T5 doesn't use temperature directly; we simulate with top_p/top_k if needed
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            top_p=0.9,
            top_k=50
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

# Simple RAG: retrieve top chunks, then pass them as context to the LLM with the query
def rag_answer(file_path, query):
    retriever = build_retriever(file_path)
    docs = retriever.get_relevant_documents(query)
    context = "\n\n".join([d.page_content for d in docs])
    prompt = f"Use the following context to answer the user question.\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"
    llm = LocalFlanLLM()
    answer = llm.generate(prompt, max_new_tokens=256)
    return answer, docs


In [None]:
import gradio as gr

def qa_interface(file_path, query):
    # file_path is str from Gradio File when type="filepath"
    answer, source_docs = rag_answer(file_path, query)
    return answer

rag_app = gr.Interface(
    fn=qa_interface,
    inputs=[
        gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here ...")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="PDF RAG QA Bot (Local Transformers)",
    description="Upload a PDF and ask a question. The bot uses local embeddings + Chroma + FLAN-T5."
)

# Launch
rag_app.launch(server_name="0.0.0.0", server_port=7860, share=True)
