In [1]:
import streamlit as st
from AuxSearch.components.PDFTextExtractor import PDFConverter
from AuxSearch.components.TextChunkerEmbedder import TextProcessor
from langchain_community.vectorstores.faiss import FAISS
from AuxSearch.constants import *
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain.schema import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate

pdfconverter = PDFConverter()
textprocessor = TextProcessor()


  from .autonotebook import tqdm as notebook_tqdm


In [2]:


def user_input(question):
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    # new_db = FAISS.load_local("faiss_index", embeddings)
    vectorstore = FAISS.load_local("faiss_index",embeddings)
    retriever = vectorstore.as_retriever(search_type = "similarity",k=3)
    
    prompt_template = """
        Answer the question to the point without description. If answer not found in context reply "Answer Not found"\n\n
        Context:\n {context}?\n
        Question: \n{question}\n

        Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-pro",
                                temperature=0.3)
    prompt = PromptTemplate(template=prompt_template,input_variables=["context","question"])


    rag_chain = (
        {
            "context":retriever,
            "question": RunnablePassthrough()
        }
        | prompt
        | model
        | StrOutputParser()
    )

    rag_chain.invoke(question)


In [11]:
question = "list down the skills"
pdf = "Omkar_Firame_Resume.pdf"

In [12]:
raw_text = pdfconverter.pdf_to_text(pdf)
text_chunks = textprocessor.get_chunks(raw_text)
textprocessor.get_vector_store(text_chunks)


In [13]:
text_chunks

['Omkar\nFirame\n+919404350615\n|\nE-Mail\n|\nLinkedin\n|\nGitHub\nSUMMAR Y:\nData\nScientist\nwith\nstrong\nMachine\nLearning\nfoundation\nand\n3+\nyears\nof\nexperience\nin\ndeveloping\nrobust\nML\nmodels\nusing\npredictive\ndata\nmodeling,\nanalyze\ndata\nto\ndeliver\ninsights\nand\nimplement\naction-oriented\nsolutions\nto\ncomplex\nbusiness\nproblems.\nTECHNICAL\nSKILLS:\nPython,\nR,\nMachine\nLearning,\nDeep\nLearning,\nPyTorch,\nIguazio,\nKubeFlow\nPipeline,\nAzure\nDatabricks,\nDask,\nWeb\nScraping\n(Selenium),\nGit,\nEXPERIENCE:\nData\nScientist\n-',
 'Pipeline,\nAzure\nDatabricks,\nDask,\nWeb\nScraping\n(Selenium),\nGit,\nEXPERIENCE:\nData\nScientist\n-\nWynum\nAutomation\nServices\nPvt.\nLtd\n|\nApril\n2023\n-\nPresent\n●\nManaged\na\nproject\nimplementing\nan\ninformation\nretrieval\nsystem\nfor\nextracting\nrelevant\npress\nreleases\nfrom\nonline\nsources.\n●\nApplied\nadvanced\ndata\npreprocessing\ntechniques\nto\ncleanse\nand\nstructure\ncontent,\nenhancing\ncategorizati

In [14]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
# new_db = FAISS.load_local("faiss_index", embeddings)
vectorstore = FAISS.load_local("faiss_index",embeddings)
retriever = vectorstore.as_retriever(search_type = "similarity",k=3)


In [15]:
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000263806CDEB0>)

In [16]:

prompt_template = """
    Answer the question to the point without description. If answer not found in context reply "Answer Not found"\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
"""
model = ChatGoogleGenerativeAI(model="gemini-pro",
                            temperature=0.6)
prompt = PromptTemplate(template=prompt_template,input_variables=["context","question"])


rag_chain = (
    {
        "context":retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | model
    | StrOutputParser()
)


In [17]:
rag_chain

{
  context: VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000263806CDEB0>),
  question: RunnablePassthrough()
}
| PromptTemplate(input_variables=['context', 'question'], template='\n    Answer the question to the point without description. If answer not found in context reply "Answer Not found"\n\n\n    Context:\n {context}?\n\n    Question: \n{question}\n\n\n    Answer:\n')
| ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.6, client= genai.GenerativeModel(
     model_name='models/gemini-pro',
     generation_config={}.
     safety_settings={}
  ))
| StrOutputParser()

In [19]:

rag_chain.invoke(question)

'- Python\n- Beautiful Soup\n- Requests\n- Azure Data Lake\n- Azure Databricks\n- Dask\n- Web Scraping (Selenium)\n- Git\n- Machine Learning\n- Chemical Composition\n- Python Scripts'

In [16]:

st.set_page_config("Search In PDF")
st.header(" Serach In PDF ")

pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit Button", accept_multiple_files=False)
user_question = st.text_input("Question")

if user_question:
    
    if st.button("Submit & Process"):
        with st.spinner("Processing..."):
            raw_text = pdfconverter.pdf_to_text(pdf_docs)
            text_chunks = textprocessor.get_chunks(raw_text)
            textprocessor.get_vector_store(text_chunks)
            user_input(user_question)
            st.success("Done")


In [None]:
# Removed code

def user_input():
    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    # new_db = FAISS.load_local("faiss_index", embeddings)
    vectorstore = FAISS.load_local("faiss_index",embeddings)
    retriever = vectorstore.as_retriever(search_type = "similarity",k=1)
    
    prompt_template = """
        Answer the question to the point without description. If answer not found in context reply "Answer Not found"\n\n
        Context:\n {context}?\n
        Question: \n{question}\n

        Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-pro",
                                temperature=0.6)
    prompt = PromptTemplate(template=prompt_template,input_variables=["context","question"])


    rag_chain = (
        {
            "context":retriever,
            "question": RunnablePassthrough()
        }
        | prompt
    )
    return rag_chain
