In [15]:
!pip install langchain PyPDF2 transformers torch
!pip install langchain
!pip install PyPDF2
!pip install transformers
!pip install torch
!pip install langchain_community
!pip install sentence-transformers
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1


In [29]:
import PyPDF2
import os
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

# Load PDF and extract text
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() or ""  # Ensure to handle None
    return text

# Load a specific PDF file
def load_pdf_document(pdf_path):
    if not os.path.isfile(pdf_path):
        raise FileNotFoundError(f"The file {pdf_path} does not exist.")
    text = extract_text_from_pdf(pdf_path)
    return [Document(page_content=text, metadata={"source": pdf_path})]

# Path to the PDF file
pdf_file_path = "/content/Physics Lab Viva QnA UVCE .pdf"  # Update this to your PDF file path
documents = load_pdf_document(pdf_file_path)

# Initialize embeddings model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create a vector store
vector_store = FAISS.from_documents(documents, embedding_model)

# Define the question-answering model
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Wrap the pipeline with HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=qa_model)

# Build the RetrievalQA chain
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
)

# Query the system
def query_pdf_system(query):
    # Get relevant documents for the query
    docs = retrieval_qa.retriever.get_relevant_documents(query)

    # Combine the context from the documents for the QA model
    context = " ".join([doc.page_content for doc in docs])

    # Ensure there is context to query
    if not context:
        return "No relevant context found for the query."

    # Get the answer from the model
    response = qa_model(question=query, context=context)
    return response['answer']

# Example usage
if __name__ == "__main__":
    user_query = "What is meant by inertia of a body?"
     # Replace with your query
    result = query_pdf_system(user_query)
    print("Query Result:", result)





Query Result: concentric	circular
