In [1]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
import os 

In [2]:
prompt_template = """
    Answer the questions based on the provided context only.
    Please provide the most accurate response based on the question.
    
    <context>
    {context}
    </context>
    Question:{question}
"""


In [3]:
loader = TextLoader('my_resume_data.txt')

text_document_from_txt = loader.load()

In [4]:
huggingface_embedddings = HuggingFaceBgeEmbeddings(model_name='BAAI/bge-small-en-v1.5', model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings':True})

  from tqdm.autonotebook import tqdm, trange


In [5]:
text_document_from_txt

[Document(metadata={'source': 'my_resume_data.txt'}, page_content='Somenath Choudhury is a technology enthusiast with expertise in Java, Python, JavaScript, ExpressJS, FastAPI, MongoDB, React, NextJS, Numpy, Pandas, Matplotlib, Seaborn, Scikit-learn, TensorFlow, Docker, and Git. He completed his Bachelor of Technology in Computer Science and Engineering from Lovely Professional University, India (2020 - 2024) with a CGPA of 7.36. He completed his Class XII at Bardhaman Sri Ramkrishna Saradapith Uchcha Vidyalaya in 2019 and his Class X at St. Pauls Academy in 2017. Somenath has developed several projects, including a [Tomato Leaf Disease Classification web app](https://github.com/somenath203/tomato-leaf-disease-classification-ten-classes) using deep learning models implemented in TensorFlow, FastAPI for sending images from frontend to seven models, ExpressJS for server-side authentication, MongoDB for data storage, and a React frontend for client side authentication, image upload and re

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

document_chunks = text_splitter.split_documents(text_document_from_txt)

In [7]:
vector_db = FAISS.from_documents(documents=document_chunks, embedding=huggingface_embedddings)

In [8]:
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [9]:
groq_api_key = "your groq api key"

llm_model = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="gemma2-9b-it"
)

In [10]:
prompt = PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [11]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=llm_model,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [12]:
user_input = "tell me about the skills of Somenath"

In [13]:
result = retrievalQA.invoke({"query": user_input})

In [14]:
print(result['result'])

Somenath Choudhury is skilled in:

* **Programming Languages:** Java, Python, JavaScript
* **Frameworks/Libraries:** ExpressJS, FastAPI, React, NextJS, Numpy, Pandas, Matplotlib, Seaborn, Scikit-learn, TensorFlow, Pytesseract
* **Databases:** MongoDB
* **Other Tools:** Docker, Git 


He is also proficient in building web applications, using deep learning models, and handling image processing tasks. 

