In [8]:
# pip install langchain openai transformers[torch] faiss-cpu pypdf

# Resume RAG System using LLaMA
This notebook demonstrates a Retrieval-Augmented Generation (RAG) system that allows you to ask questions about your resume PDF
using a local LLaMA model.
- No API keys required
- All local and open-source
- Powered by LangChain, FAISS, and LLaMA

# Import the libraries

In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.schema import Document

import re


# Load and Read the sample resume

In [4]:
# Load PDF
loader = PyPDFLoader("Sagar_Mokashi_DS_Resume_.pdf")
documents = loader.load_and_split()

In [5]:
print(documents[0])

page_content='SAGAR MOKASHI       sagar.mokashi1995@gmail.com 
Data Scientist            +91 – 9987686276/7021780449 
 https://www.linkedin.com/in/sagar-mokashi-811343142/        7 Years 
PROFILE SUMMARY 
Results-driven Data Scientist with over 5 years of experience in Data Science and a total of 7 years in the IT 
industry. Passionate about developing high-quality machine learning models to solve complex problems requiring 
human intervention. Skilled in Machine Learning, NLP, Data Analysis, and Problem-Solving, with expertise in 
Python and AI-driven solutions. Committed to developing innovative solutions that address real-world problems 
and drive business value through data-driven insights. 
EDUCATION 
Education Start Date End Date %Score/CGPA 
BE in Information 
Technology 
12-08-2013 31-05-2017 8.3 
HSE 08-08-2011 30-04-2013 84.5% 
SSC 15-06-2001 30-04-2011 94.36% 
SKILLS 
HAVE GOOD KNOWLEDGE ABOUT: Machine Learning, Natural Language Processing, Exploratory Data Analysis, 
Chatbo

In [6]:
print(type(documents[0]))

<class 'langchain_core.documents.base.Document'>


# clean data before doing the split

In [7]:
def clean_pdf_data(text):
    text = re.sub(r'[\r\n]+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    return text

In [8]:
cleaned_docs = clean_pdf_data(documents[0].page_content) if documents else ""

In [9]:
cleaned_docs

'SAGAR MOKASHI sagar.mokashi1995@gmail.com Data Scientist +91 – 9987686276/7021780449 https://www.linkedin.com/in/sagar-mokashi-811343142/ 7 Years PROFILE SUMMARY Results-driven Data Scientist with over 5 years of experience in Data Science and a total of 7 years in the IT industry. Passionate about developing high-quality machine learning models to solve complex problems requiring human intervention. Skilled in Machine Learning, NLP, Data Analysis, and Problem-Solving, with expertise in Python and AI-driven solutions. Committed to developing innovative solutions that address real-world problems and drive business value through data-driven insights. EDUCATION Education Start Date End Date %Score/CGPA BE in Information Technology 12-08-2013 31-05-2017 8.3 HSE 08-08-2011 30-04-2013 84.5% SSC 15-06-2001 30-04-2011 94.36% SKILLS HAVE GOOD KNOWLEDGE ABOUT: Machine Learning, Natural Language Processing, Exploratory Data Analysis, Chatbot, Text Classification, ML Model Monitoring, Clustering,

# Splitting resume data into chunks

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

chunks = text_splitter.split_text(cleaned_docs)

In [11]:
print(len(chunks))

6


In [12]:
print(chunks[0])

SAGAR MOKASHI sagar.mokashi1995@gmail.com Data Scientist +91 – 9987686276/7021780449 https://www.linkedin.com/in/sagar-mokashi-811343142/ 7 Years PROFILE SUMMARY Results-driven Data Scientist with over 5 years of experience in Data Science and a total of 7 years in the IT industry. Passionate about developing high-quality machine learning models to solve complex problems requiring human intervention. Skilled in Machine Learning, NLP, Data Analysis, and Problem-Solving, with expertise in Python


# Create vector embeddings and FAISS index

In [14]:
from langchain.schema import Document

chunked_docs = [Document(page_content=chunk) for chunk in chunks]

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
vectorstore = FAISS.from_documents(chunked_docs, embedding=embedding_model)

# Load local Llama model

In [22]:
from langchain.llms import LlamaCpp

llm = LlamaCpp(
    model_path="Downloads/llama-2-7b-chat.Q4_K_M.gguf",  # Update path as needed
    n_ctx=2048,
    max_tokens=512,
    temperature=0.3,
    top_p=0.95,
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from Downloads/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32      

# Build RAG pipeline

In [23]:
from langchain.chains import RetrievalQA

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)



In [25]:
# Inferencing with RAG Pipeline

question = "What skills do I have?"
answer = qa_chain.invoke(question)
print('Answer:', answer)

Llama.generate: 503 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =  514974.17 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =  845204.48 ms /    28 runs   (30185.87 ms per token,     0.03 tokens per second)
llama_perf_context_print:       total time =  875076.69 ms /    29 tokens


Answer: {'query': 'What skills do I have?', 'result': ' You have good knowledge about Machine Learning, Natural Language Processing, Exploratory Data Analysis, Chatbot, and Text.'}


In [26]:
answer["result"]

' You have good knowledge about Machine Learning, Natural Language Processing, Exploratory Data Analysis, Chatbot, and Text.'

In [1]:
# question1 = "Which companies have I worked for?"
# answer1 = qa_chain.invoke(question1)
# print('Answer:', answer1)