In [2]:
from langchain_openai import ChatOpenAI
from os import environ

In [None]:
environ['OPENAI_API_KEY'] = "" 
environ['OPENAI_BASE_URL'] = 'https://api.ai.it.cornell.edu'

llm = ChatOpenAI(
    model="openai.gpt-4o",
    temperature=0.2,
)

<h2>Load Source Text</h2>

In [4]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Resume Maria Chang.pdf")
documents = loader.load()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 21 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)
Ignoring wrong pointing object 25 0 (offset 0)
Ignoring wrong pointing object 27 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)


In [5]:
documents[0].metadata

{'producer': 'macOS Version 26.0 (Build 25A354) Quartz PDFContext',
 'creator': 'PyPDF',
 'creationdate': "D:20250923015544Z00'00'",
 'moddate': "D:20250923015544Z00'00'",
 'source': 'Resume Maria Chang.pdf',
 'total_pages': 2,
 'page': 0,
 'page_label': '1'}

In [6]:
print(documents[0].page_content)

Maria Susana Chang Vegas Ithaca, NY 14850 | (607) 379-3864 | mc2995@cornell.edu | LinkedIn | Portfolio 
EDUCATION Cornell University, Ithaca, NY  May 2026 Master of Professional Studies in Information Science  Relevant Coursework: HCI design, Applied LLM, Innovation Management  Politecnico di Milano, Milan, Italy Sep 2025 Full-time International Master of Business Administration Relevant Coursework: Technology Management, Strategy, Project Management, Market Research  Universidad de Piura, Piura, Peru Dec 2019 Bachelor of Science in Industrial & Systems Engineering  SKILLS • Computer: SQL, Python, HTML/CSS, BigQuery, Amplitude, Figma, Miro, Jira, Looker Studio, Tableau, PowerBi, Wix, Framer, VTEX, Azure, Visual Studio Code, Github • Languages: Spanish and Italian  WORK EXPERIENCE IT Strategy Consultant Apr 2024-Jan 2025 Brachitek, Milan, Italy • Developed a responsive, user-friendly website, enhancing the company's online presence and credibility for B2B IT hardware/software solutions.

<h2>Split the document</h2>

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [8]:
chunk_size = 500
chunk_overlap = 50

In [9]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = chunk_size,
    chunk_overlap = chunk_overlap
)

In [10]:
chunks = text_splitter.split_documents(documents)

In [11]:
for chunk in chunks:
    print(chunk.page_content)
    print("-----")

Maria Susana Chang Vegas Ithaca, NY 14850 | (607) 379-3864 | mc2995@cornell.edu | LinkedIn | Portfolio
-----
EDUCATION Cornell University, Ithaca, NY  May 2026 Master of Professional Studies in Information Science  Relevant Coursework: HCI design, Applied LLM, Innovation Management  Politecnico di Milano, Milan, Italy Sep 2025 Full-time International Master of Business Administration Relevant Coursework: Technology Management, Strategy, Project Management, Market Research  Universidad de Piura, Piura, Peru Dec 2019 Bachelor of Science in Industrial & Systems Engineering  SKILLS • Computer: SQL,
-----
& Systems Engineering  SKILLS • Computer: SQL, Python, HTML/CSS, BigQuery, Amplitude, Figma, Miro, Jira, Looker Studio, Tableau, PowerBi, Wix, Framer, VTEX, Azure, Visual Studio Code, Github • Languages: Spanish and Italian  WORK EXPERIENCE IT Strategy Consultant Apr 2024-Jan 2025 Brachitek, Milan, Italy • Developed a responsive, user-friendly website, enhancing the company's online presen

## Index chunks into a vector db (ChromaDB)

In [12]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [13]:
vectorstore = Chroma.from_documents(documents=chunks, embedding=OpenAIEmbeddings(model="openai.text-embedding-3-large"))

## Test Similarity Search

In [14]:
vectorstore.similarity_search("product manager advice")

[Document(id='24146878-8454-4762-8e0d-a3bfcea50987', metadata={'page': 0, 'producer': 'macOS Version 26.0 (Build 25A354) Quartz PDFContext', 'total_pages': 2, 'creator': 'PyPDF', 'moddate': "D:20250923015544Z00'00'", 'creationdate': "D:20250923015544Z00'00'", 'source': 'Resume Maria Chang.pdf', 'page_label': '1'}, page_content='design principles to optimize NLP algorithms for enhanced job matching systems. Associate Product Manager Jan 2022-Sep 2023 Rappi, Lima, Peru • Developed and optimized Rappi Ads advertising products, including in-app placements and automated billing flows, reducing refunds and improving monetization. • Led CRM digitalization projects, managing the complete product lifecycle from design to adoption, achieving a 92% usage rate, and training 50+ cross-functional team members. • Made data-driven'),
 Document(id='c8bb5084-4f63-4371-a946-c7a64645b510', metadata={'page_label': '1', 'total_pages': 2, 'page': 0, 'source': 'Resume Maria Chang.pdf', 'moddate': "D:202509230

In [15]:
vectorstore.similarity_search_with_score("product manager advice")

[(Document(id='24146878-8454-4762-8e0d-a3bfcea50987', metadata={'page_label': '1', 'source': 'Resume Maria Chang.pdf', 'total_pages': 2, 'moddate': "D:20250923015544Z00'00'", 'creationdate': "D:20250923015544Z00'00'", 'creator': 'PyPDF', 'producer': 'macOS Version 26.0 (Build 25A354) Quartz PDFContext', 'page': 0}, page_content='design principles to optimize NLP algorithms for enhanced job matching systems. Associate Product Manager Jan 2022-Sep 2023 Rappi, Lima, Peru • Developed and optimized Rappi Ads advertising products, including in-app placements and automated billing flows, reducing refunds and improving monetization. • Led CRM digitalization projects, managing the complete product lifecycle from design to adoption, achieving a 92% usage rate, and training 50+ cross-functional team members. • Made data-driven'),
  1.0092642307281494),
 (Document(id='c8bb5084-4f63-4371-a946-c7a64645b510', metadata={'page': 0, 'creator': 'PyPDF', 'page_label': '1', 'moddate': "D:20250923015544Z00'

## Setup retrieval

In [16]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [17]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [18]:
format_docs(retriever.invoke("product manager experience"))

'design principles to optimize NLP algorithms for enhanced job matching systems. Associate Product Manager Jan 2022-Sep 2023 Rappi, Lima, Peru • Developed and optimized Rappi Ads advertising products, including in-app placements and automated billing flows, reducing refunds and improving monetization. • Led CRM digitalization projects, managing the complete product lifecycle from design to adoption, achieving a 92% usage rate, and training 50+ cross-functional team members. • Made data-driven\n\n• Co-wrote investment pitch and proposed strategic initiatives for expansion into new consumer markets. • Created digital business cards based on Bootstrap for wearable devices, improving client information exchange and networking capabilities. • Managed stakeholder input and requirements while delivering scalable web solutions. Product Manager Intern May 2024-Sep 2024 Jobrapido, Milan, Italy • Designed and tested ML models for keyword extraction from job postings, achieving 80% accuracy\n\ncro

In [19]:
from langchain_core.prompts import PromptTemplate

template = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    
    Question: {question} 
    
    Context: {context} 
    
    Answer:
"""
prompt = PromptTemplate.from_template(template)

## Alternatives: RAG Workflow without LangGraph

In [20]:
# --- Alternative: Manual RAG without LangGraph ---
# Minimal workflow: retrieve top-k chunks, build a compact prompt, call the LLM, and show sources.

from langchain_chroma import Chroma
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=chunks, embedding=OpenAIEmbeddings(model="openai.text-embedding-3-large"))

def format_docs(docs):
    return "\n\n---\n\n".join(d.page_content for d in docs)

question = "product management experience" 
k = 5

# 1) Retrieve
docs = vectorstore.similarity_search(question, k=5)

# 2) Build a concise instruction with the retrieved context
context = format_docs(docs)
system_instructions = (
    "You are an experienced career advisor. Your role is to provide personalized career guidance based on the documents provided to you.\n"
    "Use the provided context and analyze the information to provide actionable advice to help the person achieve their objectives. Focus on being specific, practical, and encouraging in your recommendations\n"
    "Base your advice solely on the information provided in the documents. If important information is missing, acknowledge this and work with what you have or ask clarifying questions.\n"
    "Keep your advice concise and focused. Prioritize the most important insights rather than being exhaustive. Aim for clarity and impact over comprehensiveness.\n"
    f"Context:\n{context}"
            )

# 3) Ask the model
response = llm.invoke([
    SystemMessage(content=system_instructions),
    HumanMessage(content=question),
])

# 4) Display answer + sources
print("Answer:\n", response.content, "\n")
print("Sources:")
for i, d in enumerate(docs, 1):
    src = d.metadata.get("source", "(no source)")
    print(f"[{i}] {src}")


Answer:
 Based on your experience, you have a solid foundation in product management with a focus on digital products, data-driven decision-making, and cross-functional collaboration. Here’s a breakdown of your product management experience:

### Key Strengths:
1. **Product Development and Optimization**:
   - At Rappi, you developed and optimized advertising products, including in-app placements and automated billing flows, which improved monetization and reduced refunds.
   - At Superfood Holding, you managed digital products and delivered scalable web solutions.

2. **Data-Driven Decision Making**:
   - Proficient in SQL and Power BI, you conducted A/B testing and used data insights to inform product decisions.
   - You achieved a 75% average goal achievement rate by aligning PRDs with strategic objectives.

3. **Cross-Functional Collaboration**:
   - Partnered with engineering, design, and business teams to deliver scalable solutions for diverse workflows.
   - Trained 50+ cross-fu