In [17]:
import torch
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())


True
True


In [28]:
#read resumes from a directory using python-docx
import os
from docx import Document
resumes = []
resumes_dir = "resumes"
for filename in os.listdir(resumes_dir):
    if filename.endswith(".docx"):
        doc = Document(os.path.join(resumes_dir, filename))
        text = "\n".join([para.text for para in doc.paragraphs])
        resumes.append(text)
print(f"Loaded {len(resumes)} resumes.")    

Loaded 1 resumes.


In [29]:
#display first 500 characters of the first resume
print(resumes[0][:1500])

Highly skilled and experienced Argentine-Italian IT professional with a strong technical background and over 7 years of leadership experience. Proven track record of facilitating discussions with senior leadership, addressing technical trade-offs, implementing best practices, and mitigating risks. Thrives in large, dynamic, and culturally diverse environments. Seeking a challenging role in an adaptable and forward-thinking company, where I can leverage my expertise to drive innovation, deliver exceptional solutions, and contribute to the company's growth and success. 

Work History
2022 - Present
Solution Architect, Data & Analytics / ExxonMobil, Houston, TX
Shape strategic vision and roadmaps for data and analytics technologies within the Unconventional Upstream domain. Define future states and spearhead innovative solutions to optimize operations and decision-making processes.
Collaborate closely with cross-functional teams, including product owners, data engineers, and data scientis

In [30]:
print(resumes)

["Highly skilled and experienced Argentine-Italian IT professional with a strong technical background and over 7 years of leadership experience. Proven track record of facilitating discussions with senior leadership, addressing technical trade-offs, implementing best practices, and mitigating risks. Thrives in large, dynamic, and culturally diverse environments. Seeking a challenging role in an adaptable and forward-thinking company, where I can leverage my expertise to drive innovation, deliver exceptional solutions, and contribute to the company's growth and success. \n\nWork History\n2022 - Present\nSolution Architect, Data & Analytics / ExxonMobil, Houston, TX\nShape strategic vision and roadmaps for data and analytics technologies within the Unconventional Upstream domain. Define future states and spearhead innovative solutions to optimize operations and decision-making processes.\nCollaborate closely with cross-functional teams, including product owners, data engineers, and data 

In [39]:
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer

#leer la clave API desde variables del archivo .env
import os
from dotenv import load_dotenv

from langchain_text_splitters import RecursiveCharacterTextSplitter



load_dotenv()
pinecone_api_key = os.getenv("PINECONE_API_KEY")

# 1. Inicializar Pinecone
pc = Pinecone(api_key=pinecone_api_key)

# 2. Crear un índice (solo la primera vez)
index_name = "resume-embeddings-2"

# Verificar si el índice existe
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,  # depende del modelo de embeddings
        metric="cosine",  # o "euclidean", "dotproduct"
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

# 3. Conectar al índice
index = pc.Index(index_name)




In [40]:
chunk_size = 500
chunk_overlap = 50

In [41]:
# 4. Generar embeddings del texto
model = SentenceTransformer('all-MiniLM-L6-v2')  # 384 dimensiones
#generar el loop para embedir multiples textos

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
texts = text_splitter.split_text(resumes[0])
print(texts)


['Highly skilled and experienced Argentine-Italian IT professional with a strong technical background and over 7 years of leadership experience. Proven track record of facilitating discussions with senior leadership, addressing technical trade-offs, implementing best practices, and mitigating risks. Thrives in large, dynamic, and culturally diverse environments. Seeking a challenging role in an adaptable and forward-thinking company, where I can leverage my expertise to drive innovation, deliver', "my expertise to drive innovation, deliver exceptional solutions, and contribute to the company's growth and success.", 'Work History\n2022 - Present\nSolution Architect, Data & Analytics / ExxonMobil, Houston, TX\nShape strategic vision and roadmaps for data and analytics technologies within the Unconventional Upstream domain. Define future states and spearhead innovative solutions to optimize operations and decision-making processes.', 'Collaborate closely with cross-functional teams, inclu

In [42]:



# 6. Para múltiples textos (batch)
embeddings = model.encode(texts)

vectors_to_upsert = [
    {
        "id": f"doc-{i}",
        "values": emb.tolist(),
        "metadata": {"text": txt}
    }
    for i, (txt, emb) in enumerate(zip(texts, embeddings))
]

index.upsert(vectors=vectors_to_upsert)


UpsertResponse(upserted_count=15, _response_info={'raw_headers': {'date': 'Wed, 26 Nov 2025 00:32:03 GMT', 'content-type': 'application/json', 'content-length': '20', 'connection': 'keep-alive', 'x-pinecone-request-lsn': '1', 'x-pinecone-request-logical-size': '28724', 'x-pinecone-request-latency-ms': '602', 'x-pinecone-request-id': '6557173867556376823', 'x-envoy-upstream-service-time': '454', 'grpc-status': '0', 'server': 'envoy'}})

In [43]:

# 7. Buscar textos similares
query_text = "What is the education?"
query_embedding = model.encode(query_text).tolist()

results = index.query(
    vector=query_embedding,
    top_k=5,
    include_metadata=True
)

for match in results['matches']:
    print(f"Score: {match['score']}, Text: {match['metadata']['text']}")

Score: 0.2211833, Text: Highly skilled and experienced Argentine-Italian IT professional with a strong technical background and over 7 years of leadership experience. Proven track record of facilitating discussions with senior leadership, addressing technical trade-offs, implementing best practices, and mitigating risks. Thrives in large, dynamic, and culturally diverse environments. Seeking a challenging role in an adaptable and forward-thinking company, where I can leverage my expertise to drive innovation, deliver
Score: 0.187477127, Text: Nov 2004
Computer Science Engineer / Universidad Nacional del Sur, Bahía Blanca
Bachelor’s Degree
Certifications
SCRUM Product Owner I (PSPO) (2023)
AWS Solutions Architect Associate (2022)
AWS Cloud Practitioner (2021)
TOGAF Foundation (2020)
SA 4.0 SAFe 4 Agilist (2018)
RHCE Red Hat Certified Engineer (2010)
Languages
English – Bilingual
Spanish – Native
French – Elementary
Score: 0.168674469, Text: my expertise to drive innovation, deliver exce

In [52]:
import langchain
import os
from random import randint
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.prompts import MessagesPlaceholder
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_core.messages import AIMessage, HumanMessage
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [None]:
#instantiate Groq for langchain


llm = ChatGroq(
    model="deepseek-r1-distill-llama-70b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
    # other params...
)
# Incorporate the retriever into a question-answering chain.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(chat, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    print("session_id", session_id)
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

def get_completion(usr_txt, session_id):
    conversational_rag_chain.invoke(
        {"input": usr_txt},
        config={
            "configurable": {"session_id": session_id}
        },  # constructs a key "abc123" in `store`.
    )["answer"]
    last_message = store[session_id].messages[-1].content
    return last_message
