In [45]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain.chains import create_history_aware_retriever
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
import streamlit as st


def get_session_history(session_id:str)->BaseChatMessageHistory:
    if session_id not in st.session_state.store:
        st.session_state.store[session_id]=ChatMessageHistory()
    return st.session_state.store[session_id]


def get_docs_from_files(files):
    documents=[]
    for file in files:
        filepath = file.name
        with open(filepath,"wb") as f:
            f.write(file.getvalue())

        docs = PyPDFLoader(filepath).load()
        documents.extend(docs)

    return documents


def get_vectorstore_from_files(files, HF_Embed_Model,):
    # pdf_docs = get_docs_from_files(files)                 ##################
    pdf_docs = get_docs_from_files_notebook(files)          ##################

    split_docs=RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=150).split_documents(pdf_docs)
    embeddings = HuggingFaceEmbeddings(model_name = HF_Embed_Model)
    vectorestore = FAISS.from_documents(split_docs, embeddings)

    return vectorestore




def get_history_aware_retriever(vectorstore, llm):
    # Create standalone question from current question + chat history
    retriever = vectorstore.as_retriever()    
    contextualize_q_system_prompt=(
        "Given a chat history and the latest user question"
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", contextualize_q_system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
    
    history_aware_retriever=create_history_aware_retriever(llm,retriever,contextualize_q_prompt)

    return history_aware_retriever




def get_rag_chain(history_aware_retriever, llm):
    # Create rag_chain to answer standalone question from history_aware_retriever
    system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. Do not use any outside knowledge."
            "\n\n"
            "{context}"
        )
    qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
    
    question_answer_chain=create_stuff_documents_chain(llm,qa_prompt)
    rag_chain=create_retrieval_chain(history_aware_retriever,question_answer_chain)

    return rag_chain


In [54]:
LLM_Model="Gemma2-9b-It"
HF_Embed_Model = "all-MiniLM-L6-v2"
groq_api_key = st.secrets["GROQ_API_KEY"]
session_id = 'Chat2'
config = {"configurable": {"session_id":session_id}}
store = {}


def get_docs_from_files_notebook(filepath):
    return PyPDFLoader(filepath).load()
def get_session_history_notebook(session_id:str)->BaseChatMessageHistory:
    if session_id not in store:
        store[session_id]=ChatMessageHistory()
    return store[session_id]



llm=ChatGroq(groq_api_key=groq_api_key,model_name=LLM_Model)

# files=st.file_uploader('upload here')                                       ##################
files = 'Data/i-485instr.pdf'                                                 ##################
vectorstore = get_vectorstore_from_files(files=files, HF_Embed_Model=HF_Embed_Model)
history_aware_retriever = get_history_aware_retriever(vectorstore, llm)
rag_chain = get_rag_chain(history_aware_retriever, llm)


query = 'Provide the list of Evidences required'
conversational_rag_chain=RunnableWithMessageHistory(rag_chain, get_session_history_notebook, input_messages_key="input", history_messages_key="chat_history", output_messages_key="answer")
response = conversational_rag_chain.invoke({"input": query}, config=config)
response['answer']



"Here is a list of evidences required, compiled from the text you provided:\n\n**For Form I-485:**\n\n* **Evidence of Haitian nationality:**\n    * Certificate of naturalization or certificate of citizenship issued by the Haitian government (if acquired outside of birth in Haiti).\n* **Evidence of battery or extreme cruelty:**\n    * Documents such as court documents, police reports, news articles, copies of reimbursement forms for travel to and from court, and affidavits of other witnesses or officials.\n\n**General Evidences (applicable to various aspects of the application):**\n\n* Two identical color passport-style photographs.\n* Photocopy of a government-issued identity document with photograph. \n* Proof of residence in the United States (affidavits with witness contact information preferred, but other supporting evidence encouraged).\n* Affidavits from credible witnesses with personal knowledge of the applicant's residence in the United States.\n\n**Other Potential Evidences:**

In [55]:
query = 'Are these all family based?'
conversational_rag_chain=RunnableWithMessageHistory(rag_chain, get_session_history_notebook, input_messages_key="input", history_messages_key="chat_history", output_messages_key="answer")
response = conversational_rag_chain.invoke({"input": query}, config=config)
response['answer']

"You are right to ask! While the provided text focuses heavily on family-based categories like VAWA and derivative family members, it also touches upon other categories like crime victim (U Nonimmigrant) which are not strictly family-based. \n\nSo, the evidence list isn't *exclusively* family-based. \n\n\nLet me know if you have more questions about specific categories or types of evidence. \n\n"

In [56]:
query = 'Provide only those which are family based in a list'
conversational_rag_chain=RunnableWithMessageHistory(rag_chain, get_session_history_notebook, input_messages_key="input", history_messages_key="chat_history", output_messages_key="answer")
response = conversational_rag_chain.invoke({"input": query}, config=config)
response['answer']

'Here are the family-based evidence requirements mentioned in the text:\n\n* **Evidence of Haitian nationality:** (for HRIFA dependents)\n* **Evidence that the qualifying relationship to the principal existed at the time the principal was granted adjustment of status and that the relationship still exists:** (for HRIFA dependents) \n* **Evidence of battery or extreme cruelty:** (Applicable to abused spouses and children)\n* **Marriage certificate (or divorce certificate or death certificate):** (For abused spouses and children under HRIFA, and other categories like K-1)\n* **Photocopy of your parents’ marriage certificate, your adoption certificate, or other proof of your parent-child relationship with the principal applicant:** (For derivative applicant children who are not refugees)\n\n\n\n\nLet me know if you have any more questions!\n'

In [60]:
store

{'Chat2': InMemoryChatMessageHistory(messages=[HumanMessage(content='Provide the list of Evidences required'), AIMessage(content="Here is a list of evidences required, compiled from the text you provided:\n\n**For Form I-485:**\n\n* **Evidence of Haitian nationality:**\n    * Certificate of naturalization or certificate of citizenship issued by the Haitian government (if acquired outside of birth in Haiti).\n* **Evidence of battery or extreme cruelty:**\n    * Documents such as court documents, police reports, news articles, copies of reimbursement forms for travel to and from court, and affidavits of other witnesses or officials.\n\n**General Evidences (applicable to various aspects of the application):**\n\n* Two identical color passport-style photographs.\n* Photocopy of a government-issued identity document with photograph. \n* Proof of residence in the United States (affidavits with witness contact information preferred, but other supporting evidence encouraged).\n* Affidavits fro

In [59]:
session_id = 'Chat1'
config = {"configurable": {"session_id":session_id}}
query = 'Are there any recent ones?'
conversational_rag_chain=RunnableWithMessageHistory(rag_chain, get_session_history_notebook, input_messages_key="input", history_messages_key="chat_history", output_messages_key="answer")
response = conversational_rag_chain.invoke({"input": query}, config=config)
response['answer']

'You are right! I missed a few. \n\nHere are the more recent dates:\n\n* **04/01/24:** This is the effective date for the Form I-485 Instructions.\n* **12/18/2023:** This is the date the document was drafted. \n\n\nLet me know if you have any other questions. \n'

In [52]:
from langchain_core.runnables.history import RunnableWithMessageHistory


def get_response(rag_chain, query, session_id):
    conversational_rag_chain=RunnableWithMessageHistory(
        rag_chain,get_session_history_notebook,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer"
    )

    response = conversational_rag_chain.invoke(
        {"input": query},
        config={
            "configurable": {"session_id":session_id}
        },  # constructs a key "abc123" in `store`.
    )

    return response['answer']