In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# setup gemini api key
api_key = os.environ["GOOGLE_API_KEY"]

# setup llm
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)

In [2]:
# load resume and split
loader = PyPDFLoader("RESUME_RVirtus.pdf")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print("text splitter:", text_splitter)
print("documents:", docs)

text splitter: <langchain_text_splitters.character.RecursiveCharacterTextSplitter object at 0x0000021FE2515CD0>
documents: [Document(metadata={'producer': 'Skia/PDF m140 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'resume_v2.docx', 'source': 'RESUME_RVirtus.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Reymund  M.  Virtus  reymundvirtus@gmail.com •  +639105312080  •  LinkedIn •  GitHub •  reymundvirtus.com  \nTECHNICAL  SKILLS__________________________________________________________  \n \nProgramming  Languages:  Python,  TypeScript,  JavaScript,  Dart,  PHP,  GO,  C++  Frameworks:  Next.js,  React,  Node.js,  Flask,  Django,  Laravel,  Flutter,  Serverless  Framework  Databases:  MySQL,  PostgreSQL,  DynamoDB,  MongoDB,  Amazon  Neptune,  Neo4j  Cloud  Platforms:  Amazon  Web  Services  (AWS),  Microsoft  Azure,  Google  Firebase  Tools:  Docker,  Git,  GitHub,  Terraform,  CloudFormation,  Bitbucket,  Jira  \n \nRELEVANT  EXPERIENCE

In [3]:
# create embeddings and index intro FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
print("embeddings:", embeddings)
print("vectorstore:", vectorstore)

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


embeddings: client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='sentence-transformers/all-MiniLM-L6-v2' cache_folder=None model_kwargs={} encode_kwargs={} multi_process=False show_progress=False
vectorstore: <langchain_community.vectorstores.faiss.FAISS object at 0x0000021F92116870>


In [4]:
# setup retriever chain
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    retriever = retriever,
)
print("retriever:", retriever)
print("qa_chain:", qa_chain)

retriever: tags=['FAISS', 'HuggingFaceEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000021F92116870> search_kwargs={}
qa_chain: verbose=False combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]), llm=ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), cli