## Install Required Pakages

In [18]:
!pip install langchain langchain-community langchain-groq langchain-text-splitters chromadb pymupdf gradio



## Import Libraries

In [19]:
import os
import fitz
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings

## Load PDF



In [20]:
def load_pdf_with_ocr(pdf_path):
    """Load PDF text using PyMuPDF (OCR fallback)."""
    try:
        loader = PyMuPDFLoader(pdf_path)
        docs = loader.load()
    except Exception:
        docs = []
    return docs

pdf_path = "/content/Learning_Python.pdf"
docs = load_pdf_with_ocr(pdf_path)

## Split into Chunks

In [21]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

## Create Vector DB

In [22]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma.from_documents(chunks, embeddings)

## Define LLM (Groq)

In [23]:
llm = ChatGroq(model="llama-3.1-8b-instant", api_key="paste your groq api key")


## Create Prompt



In [24]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a strict QA system that ONLY answers from the given book context.

Context from the book:
{context}

Question: {question}

Rules:
- If the answer is in the context, answer clearly in simple language.
- If the answer is NOT in the context, reply strictly with:
  "I don't know."
- Do not answer from your own knowledge.
- Ignore irrelevant questions not related to the book.
Answer:
"""
)

chain = LLMChain(llm=llm, prompt=prompt)

## Function to Answer

In [25]:
def answer_from_book_only(question):
    docs = db.similarity_search(question, k=3)
    context = "\n\n".join([d.page_content for d in docs])
    if not context.strip():
        return "I don't know."
    result = chain.invoke({"context": context, "question": question})
    return result["text"].strip()

## Gradio UI

In [27]:
import gradio as gr

def qa_gradio(question, history):
    try:
        answer = answer_from_book_only(question)
    except Exception as e:
        answer = f"Error: {str(e)}"
    history.append((question, answer))
    return history, history

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## Ask Questions from Python Book (Book-Based Answers Only)")

    chatbot = gr.Chatbot(label="Chat with the Book", height=400)
    question_inp = gr.Textbox(
        label="Type your question and press Enter",
        placeholder="e.g. What is a variable?",
        lines=1
    )
    clear_btn = gr.Button("Clear Chat")

    question_inp.submit(fn=qa_gradio, inputs=[question_inp, chatbot], outputs=[chatbot, chatbot])
    clear_btn.click(lambda: None, None, chatbot, queue=False)

demo.launch()

  chatbot = gr.Chatbot(label="Chat with the Book", height=400)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5419f8eebf703f3e9e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


