In [137]:
import pdfplumber
import os
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEndpoint
from operator import itemgetter

# Load environment variables
load_dotenv()
hf_token = os.getenv('HF_TOKEN')
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token

In [138]:
tmp_file_path = r"scr\test1.pdf"

In [139]:
with pdfplumber.open(tmp_file_path) as pdf:
    full_text = ""
    for page in pdf.pages:
        full_text += page.extract_text()

splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
chunks = splitter.split_text(full_text)

In [140]:
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embeddings = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = FAISS.from_texts(chunks, embeddings)
retriever = vectorstore.as_retriever()



In [141]:
# Initialize the model
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
model = HuggingFaceEndpoint(repo_id=repo_id, max_length=50, temperature=0.1, token=hf_token)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\navee\.cache\huggingface\token
Login successful


In [142]:
from langchain.prompts import PromptTemplate

template = """
You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: Here is some context

Question: Here is a question



In [143]:

chain = prompt | model 

chain.invoke({
    "context": "Anna's sister is Susan", 
    "question": "Who is Susan's sister?"
})


'\nAnswer: Anna.'

In [144]:
chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
)

questions = [
     "What is Loan Agreement?",
    "What is Cash Credit Loan?",
    "What are the Documents Required for a Cash Credit Loan?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print("*************************\n")

Question: What is Loan Agreement?
Answer: Answer: A loan agreement is a legally binding contract between a lender and a borrower outlining the terms and conditions of a loan.
*************************

Question: What is Cash Credit Loan?
Answer: Answer: A Cash Credit Loan is a short term financing option offered to businesses like small and medium enterprises, sole proprietorships, partnerships, and corporations.
*************************

Question: What are the Documents Required for a Cash Credit Loan?
Answer: 
Answer: Proof of business registration, financial documents, income proof documents such as salary slips, bank statements, and an employee ID card.
*************************



In [146]:

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
)


question = "What is Loan Agreement?"

print(f"Question: {question}")
print(f"Answer: {chain.invoke({'question': question})}")
print("*************************\n")

Question: What is Loan Agreement?
Answer: Answer: A loan agreement is a legally binding contract between a lender and a borrower outlining the terms and conditions of a loan.
*************************



In [171]:
response = chain.invoke({'question': question})

In [175]:
response

'Answer: A loan agreement is a legally binding contract between a lender and a borrower outlining the terms and conditions of a loan.'