In [None]:
!pip install langchain-community
!pip install langchain
! pip install pypdf
!pip install -U langchain-community
!pip install sentence-transformers
!pip install faiss-cpu
!pip install gradio

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<1.0.0,>=0.3.66 (from langchain-community)
  Downloading langchain_core-0.3.69-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain<1.0.0,>=0.3.26 (from langchain-community)
  Downloading langchain-0.3.26-py3-none-any.whl.metadata (7.8 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain-community)
  Downloading sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting langsmith>=0.1.125 (from langchain-community)
  Downloading langsmith-0.4.6-py3-none-any.whl.metadata (15 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-commu

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import pipeline, AutoTokenizer
from huggingface_hub import login
import gradio as gr
import os
import torch
from functools import lru_cache
import psutil
import socket
from contextlib import closing
import logging


In [None]:
# Set up logging for debugging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Authentication
login(token="")
os.environ["HUGGINGFACE_HUB_TOKEN"] = ""


In [None]:
# Find an available port
def find_free_port(start_port=7860, max_attempts=10):
    for port in range(start_port, start_port + max_attempts):
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
            try:
                s.bind(("0.0.0.0", port))
                return port
            except OSError:
                continue
    raise OSError(f"No free ports found in range {start_port}-{start_port + max_attempts - 1}")


In [None]:
# Terminate processes using a port
def free_port(port):
    for conn in psutil.net_connections():
        if conn.laddr.port == port and conn.status == 'LISTEN':
            try:
                process = psutil.Process(conn.pid)
                process.terminate()
                process.wait(timeout=3)
                logger.info(f"Terminated process {conn.pid} using port {port}")
            except psutil.NoSuchProcess:
                pass


In [None]:
# Optimized PDF loading and preprocessing
@lru_cache(maxsize=1)
def load_and_process_pdf():
    try:
        loader = PyPDFLoader("cse.pdf")
        docs = loader.load()
        for doc in docs:
            doc.page_content = doc.page_content.replace("\n", " ").strip()
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=400,
            chunk_overlap=50,
            separators=[". ", "! ", "? ", " ", ""]
        )
        chunks = text_splitter.split_documents(docs)
        logger.info(f"Loaded and split PDF into {len(chunks)} chunks")
        return chunks
    except Exception as e:
        logger.error(f"PDF loading failed: {e}")
        return []


In [None]:
# Create vector store
@lru_cache(maxsize=1)
def create_vector_store():
    chunks = load_and_process_pdf()
    if not chunks:
        logger.error("No chunks available for vector store")
        return None
    embedding_model = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"},
        encode_kwargs={"normalize_embeddings": True}
    )
    try:
        vector_store = FAISS.from_documents(chunks, embedding_model, distance_strategy="COSINE")
        logger.info("Vector store created successfully")
        return vector_store
    except Exception as e:
        logger.error(f"Vector store creation failed: {e}")
        return None


In [None]:
# Initialize LLM pipeline
@lru_cache(maxsize=1)
def get_llm_pipeline():
    model_id = "google/flan-t5-small"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    if torch.cuda.is_available():
        from transformers import BitsAndBytesConfig
        quantization_config = BitsAndBytesConfig(load_in_4bit=True)
        model_kwargs = {"quantization_config": quantization_config}
    else:
        model_kwargs = {}
    try:
        pipe = pipeline(
            "text2text-generation",  # Better suited for Q&A
            model=model_id,
            tokenizer=tokenizer,
            device=0 if torch.cuda.is_available() else -1,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            max_new_tokens=50,
            temperature=0.1,
            top_k=40,
            pad_token_id=tokenizer.eos_token_id
        )
        logger.info("LLM pipeline initialized")
        return pipe
    except Exception as e:
        logger.error(f"LLM pipeline initialization failed: {e}")
        return None


In [None]:
# Create QA chain
@lru_cache(maxsize=1)
def get_qa_chain():
    vector_store = create_vector_store()
    if not vector_store:
        logger.error("No vector store available")
        return None
    retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 2}
    )
    prompt_template = """Answer in one sentence using the context.

    Context: {context}

    Question: {question}

    Answer: """
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    try:
        qa_chain = RetrievalQA.from_chain_type(
            llm=HuggingFacePipeline(pipeline=get_llm_pipeline()),
            retriever=retriever,
            chain_type="stuff",
            chain_type_kwargs={"prompt": prompt},
            return_source_documents=False
        )
        logger.info("QA chain created successfully")
        return qa_chain
    except Exception as e:
        logger.error(f"QA chain creation failed: {e}")
        return None


In [None]:
# Optimized answer function
def answer_question(query):
    qa_chain = get_qa_chain()
    if not qa_chain:
        logger.error("QA chain is not initialized")
        return "Error: Failed to initialize QA chain"
    try:
        logger.info(f"Processing query: {query}")
        response = qa_chain.invoke({"query": query})
        answer = response["result"].split("Answer:")[-1].strip()
        if not answer:
            logger.warning("Empty answer generated")
            return "No relevant information found in the provided document."
        return answer
    except Exception as e:
        logger.error(f"Query processing failed: {e}")
        return f"Error: {str(e)}"


In [None]:
# Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("⚡ Fast PDF Q&A - CSE Department,Comilla University")
    input_box = gr.Textbox(lines=2, placeholder="Ask about the CSE Department...")
    output_box = gr.Textbox()
    submit_btn = gr.Button("Submit")
    submit_btn.click(fn=answer_question, inputs=input_box, outputs=output_box)

# Launch server
try:
    port = find_free_port()
    free_port(port)
    interface.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False,
        quiet=True
    )
    logger.info(f"Gradio server running on port {port}")
except OSError as e:
    logger.error(f"Failed to launch Gradio: {e}")
    port = find_free_port(start_port=port + 1)
    interface.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False,
        quiet=True
    )
    logger.info(f"Gradio server running on port {port}")

<IPython.core.display.Javascript object>