In [1]:
!pip install langchain openai chromadb tiktoken unstructured pypdf gradio python-dotenv

Collecting chromadb
  Downloading chromadb-1.0.12-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting unstructured
  Downloading unstructured-0.17.2-py3-none-any.whl.metadata (24 kB)
Collecting pypdf
  Downloading pypdf-5.6.0-py3-none-any.whl.metadata (7.2 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting fastapi==0.115.9 (from chromadb)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-4.2.0-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.34.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_o

In [2]:
from google.colab import userdata
openai_api_key=userdata.get('openai_api_key')

In [8]:
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

In [10]:
import os
os.makedirs("docs",exist_ok=True)
os.makedirs("chroma_store",exist_ok=True)

In [30]:
from tempfile import NamedTemporaryFile
def process_uploaded_file(file_obj):
    # Use file path directly
    file_path = file_obj.name

    # Load and split PDF
    loader = PyPDFLoader(file_path)
    documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = text_splitter.split_documents(documents)

    # Vector DB + retriever
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    vectordb = Chroma.from_documents(docs, embedding=embeddings)
    retriever = vectordb.as_retriever(search_kwargs={"k": 3})

    # QA Chain
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0,openai_api_key=openai_api_key)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    return qa_chain

def handle_upload(file_obj):
    global qa_chain_global
    try:
        qa_chain_global = process_uploaded_file(file_obj)
        return "✅ PDF uploaded and processed. You can now ask questions!"
    except Exception as e:
        return f"❌ Error: {str(e)}"

def handle_question(question):
    global qa_chain_global
    if qa_chain_global is None:
        return "❌ Please upload a PDF first."
    return qa_chain_global.run(question)



In [31]:
with gr.Blocks(title="📚 RAG Chatbot (PDF Q&A)") as demo:
    gr.Markdown("## 📄 AI Chatbot for PDF Question Answering\nUpload a PDF and ask questions about its content.")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_status = gr.Textbox(label="Status", interactive=False)
        upload_button = gr.Button("Upload")
        upload_button.click(fn=handle_upload, inputs=file_input, outputs=upload_status)

    with gr.Row():
        question = gr.Textbox(label="Ask a Question")
        answer = gr.Textbox(label="Answer", lines=4)
        ask_button = gr.Button("Ask")
        ask_button.click(fn=handle_question, inputs=question, outputs=answer)

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://72cfce83369b66627b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


