In [None]:
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import pipeline
from dotenv import load_dotenv
import streamlit as st

# Load environment variables
load_dotenv()

# Load DistilBERT question-answering model
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# Extract text from PDFs
def get_pdf_text(pdf_paths):
    text = ""
    for path in pdf_paths:
        pdf_reader = PdfReader(path)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

# Split text into smaller parts (chunks)
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_text(text)
    return chunks

# Create FAISS vector store
def get_vector_store(text_chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

# Search relevant document chunks and answer the question
def ask_question(user_question):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)

    context = " ".join([doc.page_content for doc in docs])

    response = qa_pipeline(question=user_question, context=context)

    return response['answer']


# Streamlit application
def streamlit_main():
    st.set_page_config("Chat PDF - Ask Anything Mode")
    st.header("Chat with your PDF using DistilBERT 💬📄")

    user_question = st.text_input("Ask a Question from the PDF Files")

    if user_question:
        answer = ask_question(user_question)
        st.write(f"Reply: {answer}")

    with st.sidebar:
        st.title("Menu:")
        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
        if st.button("Submit & Process"):
            with st.spinner("Processing..."):
                raw_text = get_pdf_text(pdf_docs)
                text_chunks = get_text_chunks(raw_text)
                get_vector_store(text_chunks)
                st.success("Processing Completed!")

if __name__ == "__main__":
    mode = input("Type 'streamlit' to run in Streamlit, or 'terminal' to run in Terminal: ").lower()

    if mode == 'streamlit':
        # Run this command in your terminal: streamlit run app.py
        print("Run the app using: streamlit run app.py")
    else:
        terminal_main()


In [None]:
pip install langchain-google-genai