In [1]:
! pip install --quiet langchain langchain_cohere langchain-openai tiktoken langchainhub chromadb langgraph

#### API Keys

In [2]:
import os

os.environ["OPENAI_API_KEY"] = ""

#### Anonymizing Emails & Phone No.s on Resume

In [3]:
import PyPDF2
import re

def anonymize_resume(text):
    text = re.sub(
        r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
        'EMAIL_REDACTED',
        text
    )

    phone_pattern = re.compile(
        r'(\+?\d{1,3}[\s.-]?)?'
        r'(\(?\d{3}\)?[\s.-]?)'           
        r'\d{3}[\s.-]?\d{4}'              
    )
    
    text = re.sub(
        r'(\+?\d{1,3}[\s.-]?)?(\(?\d{3}\)?[\s.-]?)\d{3}\s*[\-.\s]\s*\d{4}',
        'PHONE_REDACTED',
        text
    )

    text = re.sub(r'https?://\S+', 'URL_REDACTED', text)

    return text

In [4]:
pdf_file = open("SUPER RESUME.pdf", "rb")
reader = PyPDF2.PdfReader(pdf_file)
resume_text = ""
for page in reader.pages:
    resume_text += page.extract_text() + "\n"

pdf_file.close()

safe_text = anonymize_resume(resume_text)

#### Extracting metadata from resume

In [6]:
from openai import OpenAI
import json

client = OpenAI()

prompt = f"""
Extract the following metadata from this anonymized resume as JSON:
- skills
- projects
- tech_stack
- tags
- experience_level
- education

Resume text:
{safe_text}
"""

#### Pydantic Model for Resume metadata and its parser

In [7]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from langchain.chat_models import ChatOpenAI
from typing import List, Union, Optional

class ResumeMetadata(BaseModel):
    skills: Optional[List[str]] = None
    projects: Optional[List[str]] = None
    tech_stack: Optional[List[str]] = None
    tags: Optional[List[str]] = None
    experience_level: Optional[str] = None
    education: Optional[List[Union[str, dict]]] = None

metadata_parser = PydanticOutputParser(pydantic_object=ResumeMetadata)



#### Converting Metadata object into string for Chroma 

In [8]:
def convert_metadata_for_chroma(metadata: ResumeMetadata) -> dict:
    def list_to_str(lst):
        if not lst:
            return ""
        out = []
        for item in lst:
            if isinstance(item, dict):
                degree = item.get("degree", "")
                institution = item.get("institution", "")
                duration = item.get("duration", "")
                out.append(f"{degree} at {institution} ({duration})".strip())
            else:
                out.append(str(item))
        return ", ".join(out)

    return {
        "skills": list_to_str(metadata.skills),
        "projects": list_to_str(metadata.projects),
        "tech_stack": list_to_str(metadata.tech_stack),
        "tags": list_to_str(metadata.tags),
        "experience_level": metadata.experience_level or "",
        "education": list_to_str(metadata.education)
    }

In [9]:
llm = ChatOpenAI(model_name="gpt-4o-mini")

response = llm.predict(f"Extract metadata from the resume text:\n{safe_text}\n{parser.get_format_instructions()}")
metadata = metadata_parser.parse(response)

  llm = ChatOpenAI(model_name="gpt-4o-mini")
  response = llm.predict(f"Extract metadata from the resume text:\n{safe_text}\n{parser.get_format_instructions()}")


#### Splitting Resume data into chunks, making documents and creating the vector db

In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=100
)

chunks = text_splitter.split_text(safe_text)

In [12]:
from langchain.schema import Document

metadata_dict = convert_metadata_for_chroma(metadata)  
documents = []

for chunk in chunks:
    doc = Document(
        page_content=chunk,
        metadata=metadata_dict
    )
    documents.append(doc)

In [13]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [14]:
from langchain.vectorstores import Chroma

vector_db = Chroma.from_documents(documents, embeddings, collection_name="resumes")

#### Basic prompt for RAG with question and context

In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt_template = """
You are a helpful assistant. Use the following resume information to answer the user's question.

Resume Information:
{context}

Question:
{question}

Answer in a concise and professional manner.
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

In [17]:
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt
)


  llm_chain = LLMChain(


#### Simple Usage examples

In [18]:
job_description = """ 
About the job
Inclusion without Exception:

Tata Consultancy Services (TCS) is an equal opportunity employer, and embraces diversity in race, nationality, ethnicity, gender, age, physical ability, neurodiversity, and sexual orientation, to create a workforce that reflects the societies we operate in. Our continued commitment to Culture and Diversity is reflected in our people stories across our workforce and implemented through equitable workplace policies and processes.

TCS is an IT services, consulting, and business solutions organization that has been partnering with many of the world’s largest businesses in their transformation journeys for over 55 years. Its consulting-led, cognitive-powered portfolio of business, technology, and engineering services and solutions is delivered through its unique Location Independent Agile™ delivery model, recognized as a benchmark of excellence in software development. A part of the Tata group, India's largest multinational business group, TCS operates in 55 countries and employs over 612607,000 of the world’s best-trained consultants highly skilled individuals in 55 countries, including more than 10,000 in Canada. The company generated consolidated revenues of US $29 30 billion in the fiscal year ended March 31, 20254 and is listed on the BSE and the NSE in India. TCS' proactive stance on climate change and award-winning work with communities across the world have earned it a place in leading sustainability indices such as the MSCI Global Sustainability Index and the FTSE4Good Emerging Index.


Skills and Responsibilities:

•Must Have Technical Functional Skills Artificial Intelligence, Machine Learning Data Science professional with experience in enterprise architecture, AI product security, and digital transformation. 

•Strong hands-on expertise in cloud-native architecture (AWSGCP), threat modeling, secure AI system development, and AI governance. 

•Led mission-critical initiatives with cross-functional teams, aligning product innovation with cybersecurity best practices and compliance standards.


Roles Responsibilities-

•Strong Knowledge on AIML security, Web, API, JS Security, Mobile, Cloud Security-Engineer An Engineer with proven strong technical competence in building and implementing in AIML. 

•You will be part of a fast-paced team responsible for developing and delivering products and components focused on application security. 

•You will be challenged with identifying innovative ideas and proof of concepts to deliver against the existing and future needs. 

•Strong AI ML Engineer-Security Architecture-Development Experience Web, API, Mobile, Full Stack
•This role primarily involves performing DevOps activity in the team which manages high scalable enterprise applications. 

•You will be working on integrating off-the-shelf Application Security solutions into the environment. -OWASP, ASVS, NIST
 
Tata Consultancy Services Canada Inc. is committed to meeting the accessibility needs of all individuals in accordance with the Accessibility for Ontarians with Disabilities Act (AODA) and the Ontario Human Rights Code (OHRC). Should you require accommodations during the recruitment and selection process, please inform Human Resources.
 
Thank you for your interest in TCS. Candidates that meet the qualifications for this position will be contacted within a 2-week period. We invite you to continue to apply for other opportunities that match your profile.
"""

In [19]:
query = "Am i a good fit for this job desc?"

context_text = vector_db.similarity_search(query, k=3)  # top 3 matching documents

if job_description.strip():  # JD is not empty
    llm_input = f"Job Description:\n{job_description}\n\nResume Info:\n{context_text}"
else: 
    llm_input = f"Resume Info:\n{context_text}"
    
response = llm_chain.run(context=llm_input, question=query)
print(response)

  response = llm_chain.run(context=llm_input, question=query)


Based on your resume information, you appear to be a good fit for the job description at Tata Consultancy Services (TCS). Your experience in Artificial Intelligence and Machine Learning aligns well with the required technical skills, particularly in areas such as AI product security, cloud-native architecture, and data science. Your hands-on expertise with Python, TensorFlow, PyTorch, and cloud platforms like Google Cloud aligns with TCS's focus on innovative solutions and digital transformation.

Additionally, your projects demonstrate a strong background in developing scalable applications and working with real-time data, which is essential for the role. Your knowledge of security practices in AI and your experience with cross-functional collaboration further enhance your suitability for the position. 

Overall, you possess the relevant skills and experience that match the responsibilities outlined in the job description, making you a promising candidate for this role.


In [20]:
query = "Apart from the projects in my resume, what other project can i do that might be good for my resume for these types of jobs?"

context_text = vector_db.similarity_search(query, k=3)  # top 3 matching documents

if job_description.strip():  # JD is not empty
    llm_input = f"Job Description:\n{job_description}\n\nResume Info:\n{context_text}"
else: 
    llm_input = f"Resume Info:\n{context_text}"
    
response = llm_chain.run(context=llm_input, question=query)
print(response)

To enhance your resume for IT services roles focused on AI and machine learning, consider undertaking a project that integrates security measures with AI applications. Here are a few project ideas:

1. **AI-Powered Cyber Threat Detection**: Develop a system that uses machine learning to analyze network traffic and detect anomalies indicative of cyber threats. This could involve using supervised learning techniques to classify normal versus malicious traffic.

2. **Secure AI Model Deployment**: Create a project that focuses on deploying a machine learning model with emphasis on security best practices. You could explore topics like model encryption, secure APIs, and access management.

3. **Privacy-Preserving Machine Learning**: Implement a project that uses federated learning or differential privacy techniques to enable machine learning without compromising user data privacy.

4. **Vulnerability Scanning Tool**: Build a web application that scans for common vulnerabilities in web appli

In [22]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    model_name="gpt-4o-mini",
    temperature = 0,
)


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [23]:
metadata_summary = f"""
Projects: {metadata_dict['projects']}
Skills: {metadata_dict['skills']}
Tech Stack: {metadata_dict['tech_stack']}
Tags: {metadata_dict['tags']}
Experience Level: {metadata_dict['experience_level']}
Education: {metadata_dict['education']}
"""

#### Summarizing Job description

In [24]:
jd_summary = []

summarize_prompt = PromptTemplate(
    input_variables=["job_description"],
    template="""
You are an assistant that processes job descriptions. 
Your task is to keep all important and relevant information (skills, technologies, responsibilities, qualifications, and role-specific details) 
and remove anything unnecessary, repetitive, or generic.

Here is the job description:
{job_description}

Return a clean, concise summary that captures only the essential information for understanding the role and matching a resume.
""",
)

chain = summarize_prompt | llm
jd_summary = chain.invoke({"job_description": job_description})
print(jd_summary.content)

**Job Title:** AI/ML Security Engineer

**Company:** Tata Consultancy Services (TCS)

**Location:** Canada

**About TCS:**
TCS is an IT services, consulting, and business solutions organization with over 55 years of experience, operating in 55 countries. The company focuses on transformation journeys for large businesses and emphasizes diversity and sustainability.

**Key Responsibilities:**
- Develop and deliver products focused on application security.
- Identify innovative ideas and proof of concepts to meet current and future needs.
- Perform DevOps activities for high-scalable enterprise applications.
- Integrate off-the-shelf Application Security solutions (e.g., OWASP, ASVS, NIST).

**Required Skills:**
- Expertise in Artificial Intelligence (AI), Machine Learning (ML), and Data Science.
- Strong hands-on experience in cloud-native architecture (AWS, GCP).
- Knowledge of AIML security, Web, API, JavaScript, Mobile, and Cloud Security.
- Experience in AI product security and digi

#### Routing to either vector store, web or llm based on user query

In [25]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

routing_prompt = PromptTemplate(
    input_variables=["user_question", "metadata_summary", "jd_summary"],
    template="""
You are an expert assistant who decides where to route a user's question.

The user's resume contains information about the following:
{metadata_summary}

The user also provided a job description summary:
{jd_summary}

Routing rules (priority order):
1. If the question is about the user's resume (projects, skills, experience, education) 
   or how the resume relates to the job description → respond with VECTORSTORE.
2. If the question is about very recent or ongoing current events (e.g., 2025 or later), 
   live news, or real-time trends → respond with WEBSEARCH.
3. Otherwise (including historical events, general knowledge, greetings, or casual conversation) → respond with LLM.

Respond ONLY with one of: VECTORSTORE, WEBSEARCH, or LLM.

User Question: {user_question}
"""
)

routing_chain = LLMChain(llm=llm, prompt=routing_prompt)

In [26]:
# Example
route = routing_chain.run({
    "user_question": "Who won the olympics recently?",
    "metadata_summary": metadata_summary,
    "jd_summary": jd_summary if jd_summary else "No job description provided."
})

print("ROUTED TO:", route.strip())

ROUTED TO: LLM


### LangFlow

In [69]:
os.environ['TAVILY_API_KEY'] = ""
os.environ["OPENAI_API_KEY"] = ""
from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults()

#### Basic RAG prompt

In [70]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

prompt_template = """
You are a helpful assistant. Use the following resume information to answer the user's question.

Resume Information:
{context}

Question:
{question}

Answer in a concise and professional manner.
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

In [71]:
retriever = vector_db.as_retriever()

#### Functions for nodes in our Langgraph

In [156]:
def retrieve(state):
    """Retrieve from vectorstore (resume info)"""
    question = state["question"]
    docs = vector_db.similarity_search(question)
    return {"question": question, "documents": docs}

In [157]:
def generate_from_docs(state):
    """Generate answer using top resume matches + optional JD"""
    query = state["question"]
    
    # Retrieve top 3 matching docs from vectorstore
    context_docs = vector_db.similarity_search(query, k=3)
    context_text = "\n".join([d.page_content for d in context_docs])
    
    jd = state.get("job_description", "")
    if hasattr(jd, "content"):  # AIMessage -> get the string
        jd = jd.content
    jd = jd.strip() if jd else ""

    # Build LLM input
    if jd:
        llm_input = f"Job Description:\n{jd}\n\nResume Info:\n{context_text}"
    else:
        llm_input = f"Resume Info:\n{context_text}"
    
    # Run LLM chain
    response = llm_chain.run(context=llm_input, question=query)
    
    return {"question": query, "generation": response}


In [158]:
def llm_fallback(state):
    """LLM fallback for greetings/general questions"""
    question = state["question"]
    generation = llm.predict(f"Answer the question/ greeting/ small talk concisely : {question}")
    return {"question": question, "generation": generation}

def web_search(state):
    """Web search for live events/trends"""
    question = state["question"]
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    prompt = f"""
    Question: {question}
    Context (from web): {web_results}

    Answer concisely.
    """
    generation = llm.predict(prompt)
    return {"question": question, "generation": generation}

#### Defining GrapgState

In [167]:
from typing import List

from typing_extensions import TypedDict

class GraphState(TypedDict):
    """|
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """

    question: str
    generation: str
    documents: List[str]
    job_description: str 

#### Pydantic model and parser for Grader

In [168]:
from langchain.chat_models import ChatOpenAI
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser

class GradeAnswer(BaseModel):
    binary_score: str = Field(
        description="Answer addresses the question: 'yes' or 'no'"
    )
    hallucination: str = Field(
        description="Does the answer contain hallucinations or information not in the context? 'yes' or 'no'"
    )

# Create parser
parser = PydanticOutputParser(pydantic_object=GradeAnswer)

In [161]:
def grade_generation_v_documents_and_question(state):
    question = state["question"]
    generation = state["generation"]
    context = "\n".join([d.page_content for d in state.get("documents", [])])  # optional
    
    prompt_text = f"""
You are a grader assessing an AI answer.

1. Check if the answer addresses/resolves the user's question.  
2. Check if the answer contains hallucinations (information not present in the context/resume provided).  

Respond ONLY in JSON using this schema:
{parser.get_format_instructions()}

User Question: {question}
AI Answer: {generation}
Context (resume/docs): {context}
"""

    response = llm.predict(prompt_text)
    graded = parser.parse(response)

    if graded.binary_score.lower() == "yes" and graded.hallucination.lower() == "no":
        return "useful"
    elif graded.binary_score.lower() == "yes" and graded.hallucination.lower() == "yes":
        return "not supported"
    else:
        return "not useful"

In [162]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

routing_prompt = PromptTemplate(
    input_variables=["user_question", "metadata_summary", "jd_summary"],
    template="""
You are an expert assistant who decides where to route a user's question.

The user's resume contains information about the following:
{metadata_summary}

The user also provided a job description summary:
{jd_summary}

Routing rules (priority order):
1. If the question is about the user's resume (projects, skills, experience, education) 
   or how the resume relates to the job description → respond with VECTORSTORE.
2. If the question is about very recent or ongoing current events (e.g., 2025 or later), 
   live news, or real-time trends → respond with WEBSEARCH.
3. Otherwise (including historical events, general knowledge, greetings, or casual conversation) → respond with LLM.

Respond ONLY with one of: VECTORSTORE, WEBSEARCH, or LLM.

User Question: {user_question}
"""
)

routing_chain = LLMChain(llm=llm, prompt=routing_prompt)

In [163]:
def route_question(state):
    question = state["question"].lower()
    jd_summary = state.get("job_description", "")
    if hasattr(jd_summary, "content"):
        jd_summary = jd_summary.content
    jd_summary = jd_summary.lower() if jd_summary else ""

    if jd_summary and any(word in question for word in ["company", "role", "position", "salary", "location"]):
        return "retrieve"  # VECTORSTORE / generate_from_docs node

    # Otherwise use LLM chain routing
    route = routing_chain.run({
        "user_question": state["question"],
        "metadata_summary": state.get("metadata_summary", ""),
        "jd_summary": state.get("job_description", "")
    }).strip().lower()

    if route == "vectorstore":
        return "retrieve"
    elif route == "websearch":
        return "web_search"
    else:
        return "llm_fallback"


In [164]:
from langgraph.graph import END, StateGraph, START 

workflow = StateGraph(GraphState)

In [165]:
workflow = StateGraph(GraphState)

workflow.add_node("web_search", web_search)
workflow.add_node("retrieve", retrieve)
workflow.add_node("generate", generate_from_docs)
workflow.add_node("llm_fallback", llm_fallback)

workflow.add_conditional_edges(
    START,
    route_question,
    {
        "web_search": "web_search",
        "retrieve": "retrieve",
        "llm_fallback": "llm_fallback",
    },
)

workflow.add_edge("retrieve", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "useful": END,
        "not useful": "web_search",
        "not supported": "generate",
    },
)

workflow.add_edge("llm_fallback", END)

app = workflow.compile()


In [166]:
import pprint

inputs = {
    "question": "What chnages can I make to my resume to better fit the job description?",
    "metadata_summary": metadata_summary,
    "job_description": jd_summary
}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
        # print full state at each node
    pprint.pprint("\n---\n")

# Final generation
pprint.pprint(value["generation"])

"Node 'retrieve':"
'\n---\n'
"Node 'generate':"
'\n---\n'
('To better align your resume with the AI/ML Security Engineer position at '
 'Tata Consultancy Services, consider the following changes:\n'
 '\n'
 '1. **Highlight Relevant Experience:**\n'
 '   - Emphasize any experience related to application security, particularly '
 'in the context of AI/ML. If you have worked on security aspects of your '
 'projects, make sure to detail those contributions.\n'
 '\n'
 '2. **Incorporate Key Responsibilities:**\n'
 '   - Add specific examples of how you have developed or delivered products '
 'focused on application security. Mention any innovative ideas or proof of '
 'concepts you have created that align with security needs.\n'
 '\n'
 '3. **Showcase DevOps Experience:**\n'
 '   - If you have experience with DevOps practices, particularly in '
 'high-scalable enterprise applications, include that in your skills or '
 'project descriptions.\n'
 '\n'
 '4. **Emphasize Cloud-Native Architecture:*