In [1]:
%pip install langchain-openai langchain-community langchain-text-splitters langchain-core faiss-cpu python-dotenv pypdf langchain-openai langsmith

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import shutil
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader, DirectoryLoader, PyPDFLoader
from dotenv import load_dotenv
# from google.colab import userdata

# Load API key
# os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
load_dotenv()
print("‚úì Setup complete")
print("API Key:", os.environ.get("OPENAI_API_KEY", "Not set")[:20] + "...")

# Create folders if they don't exist
os.makedirs("./resumes", exist_ok=True)
os.makedirs("./resume_db", exist_ok=True)
print("‚úì Folders created: ./resumes and ./resume_db")


  from .autonotebook import tqdm as notebook_tqdm


‚úì Setup complete
API Key: sk-proj-2D_k1B8OV3MW...
‚úì Folders created: ./resumes and ./resume_db


### 1. Define your functions as LangChain Tools

LangChain's `@tool` decorator allows you to expose Python functions to an LLM agent. The docstring of the function is crucial as the agent uses it to understand what the tool does and what arguments it expects.

In [3]:
from langchain.tools import tool

# Wrap the existing functions as tools

@tool
def ingest_resumes_tool():
    """Ingest new resumes from the './resumes' folder into the vector database. Use this tool when new resumes need to be processed or the database needs to be updated."""
    return ingest_resumes()

@tool
def list_resumes_tool():
    """List all the unique resume file names currently stored in the vector database. Use this tool to see what resumes have been ingested."""
    return list_resumes()

@tool
def search_resumes_tool(skills: str):
    """Search for candidates whose resumes match the given skills. Input should be a comma-separated string of required skills (e.g., 'Python, Machine Learning, Docker'). Use this tool to find candidates for a job opening."""
    return search_resumes(skills)

@tool
def clear_resumes_tool():
    """Clear all resumes from the vector database. This will delete the entire resume database. Use this tool to start fresh or remove all stored resume data."""
    return clear_resumes()

# Note: generate_resume and save_resume are not included as direct agent tools here
# because they typically involve structured input (dictionaries) that are harder for a generic agent to construct directly from natural language.
# However, you could create a more complex tool that takes simpler inputs and then constructs the dictionary internally.


# Create a list of all available tools
tools = [
    ingest_resumes_tool,
    list_resumes_tool,
    search_resumes_tool,
    clear_resumes_tool
]

### 2. Set up the LangChain Agent

Now, you'll need an LLM to act as the agent's 'brain' and an `AgentExecutor` to run the agent with the defined tools.

In [4]:
import os
import shutil
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langchain.agents import create_agent
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader, DirectoryLoader, PyPDFLoader

# Initialize embeddings globally
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Define the actual resume management functions

def ingest_resumes():
    """Load resumes from ./resumes folder and add to vector database"""
    print("üì• Ingesting resumes...")
    # Load text files
    txt_loader = DirectoryLoader("./resumes", glob="**/*.txt", loader_cls=TextLoader)
    txt_docs = txt_loader.load()
    # Load PDF files
    pdf_loader = DirectoryLoader("./resumes", glob="**/*.pdf", loader_cls=PyPDFLoader)
    pdf_docs = pdf_loader.load()
    all_docs = txt_docs + pdf_docs
    
    if not all_docs:
        print("‚ùå No resumes found in ./resumes folder")
        return "No resumes found in ./resumes folder"
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(all_docs)
    
    # Check if FAISS index file exists (not just folder)
    db_file_exists = os.path.exists("./resume_db/index.faiss")
    
    if db_file_exists:
        # Load existing and add new documents
        vectorstore = FAISS.load_local("./resume_db", embeddings, allow_dangerous_deserialization=True)
        vectorstore.add_documents(chunks)
        print(f"‚úì Added {len(chunks)} chunks from {len(all_docs)} resumes")
        result = f"Added {len(chunks)} chunks from {len(all_docs)} resumes to existing database"
    else:
        # Create new vector store
        vectorstore = FAISS.from_documents(chunks, embeddings)
        print(f"‚úì Created new database with {len(chunks)} chunks from {len(all_docs)} resumes")
        result = f"Created new database with {len(chunks)} chunks from {len(all_docs)} resumes"
    
    vectorstore.save_local("./resume_db")
    print("‚úì Database saved successfully")
    return result

def list_resumes():
    """List all resumes stored in vector database"""
    print("üìã Listing resumes...")
    if not os.path.exists("./resume_db/index.faiss"):
        print("‚ùå No database found. Please ingest resumes first.")
        return "No database found. Please ingest resumes first."
    
    vectorstore = FAISS.load_local("./resume_db", embeddings, allow_dangerous_deserialization=True)
    # Get all documents
    all_docs = vectorstore.docstore._dict
    
    # Extract unique sources
    sources = set()
    for doc in all_docs.values():
        if hasattr(doc, 'metadata') and 'source' in doc.metadata:
            sources.add(os.path.basename(doc.metadata['source']))
    
    result = f"Found {len(sources)} resumes in database:\n"
    for i, source in enumerate(sorted(sources), 1):
        result += f"{i}. {source}\n"
        print(f" {i}. {source}")
    
    return result

def search_resumes(skills):
    """Search resumes by skills and return best matches"""
    print(f"üîç Searching for candidates with skills: {skills}")
    if not os.path.exists("./resume_db/index.faiss"):
        print("‚ùå No database found. Please ingest resumes first.")
        return "No database found. Please ingest resumes first."
    
    vectorstore = FAISS.load_local("./resume_db", embeddings, allow_dangerous_deserialization=True)
    
    # Search for relevant resume chunks
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    docs = retriever.invoke(skills)
    
    # Create context from retrieved documents
    context = "\n\n".join([f"Resume {i+1}:\n{doc.page_content}" for i, doc in enumerate(docs)])
    
    # Create prompt for LLM
    prompt = f"""You are a recruiter assistant. Based on the following resume excerpts, identify and rank the best candidates for the required skills.

Required Skills: {skills}

Resume Excerpts:
{context}

Please provide a quick summary for the top 3 best matching candidates. For each candidate, include their relevant skills, why they are a good fit, and a matching percentage. The response should be concise.

Answer:"""
    
    # Get LLM response
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    response = llm.invoke(prompt)
    
    print("\n" + "="*60)
    print("üéØ SEARCH RESULTS")
    print("="*60)
    print(response.content)
    print("="*60)
    
    return response.content

def clear_resumes():
    """Clear all resumes from vector database"""
    print("üóëÔ∏è Clearing resume database...")
    if os.path.exists("./resume_db"):
        shutil.rmtree("./resume_db")
        print("‚úì Database cleared successfully")
        return "Database cleared successfully"
    else:
        print("‚ùå No database found")
        return "No database found"

# Now wrap the functions as tools

@tool
def ingest_resumes_tool():
    """Ingest new resumes from the './resumes' folder into the vector database. Use this tool when new resumes need to be processed or the database needs to be updated."""
    return ingest_resumes()

@tool
def list_resumes_tool():
    """List all the unique resume file names currently stored in the vector database. Use this tool to see what resumes have been ingested."""
    return list_resumes()

@tool
def search_resumes_tool(skills: str):
    """Search for candidates whose resumes match the given skills. Input should be a comma-separated string of required skills (e.g., 'Python, Machine Learning, Docker'). Use this tool to find candidates for a job opening."""
    return search_resumes(skills)

@tool
def clear_resumes_tool():
    """Clear all resumes from the vector database. This will delete the entire resume database. Use this tool to start fresh or remove all stored resume data."""
    return clear_resumes()

# Create a list of all available tools
tools = [
    ingest_resumes_tool,
    list_resumes_tool,
    search_resumes_tool,
    clear_resumes_tool
]

# Initialize the LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Create the agent
agent_executor = create_agent(llm, tools)

print("‚úì Agent and tools set up successfully.")


‚úì Agent and tools set up successfully.


### 3. Use the Agent with Natural Language Queries

Now you can interact with your agent using natural language, and it will decide which tool (or tools) to use.

In [5]:
from langchain_core.messages import HumanMessage

# Example 1: List existing resumes
print("\n--- Agent Query: List resumes ---")
response = agent_executor.invoke({
    "messages": [HumanMessage(content="What resumes do I have?")]
})
print("Agent Response:", response["messages"][-1].content)

# Example 2: Search for candidates
print("\n--- Agent Query: Search for candidates with Angular and NestJS ---")
response = agent_executor.invoke({
    "messages": [HumanMessage(content="Find candidates who are good in Angular and NestJS.")]
})
print("Agent Response:", response["messages"][-1].content)

# Example 3: Ingest resumes (if you had new files in ./resumes folder)
# Make sure there are new files in the './resumes' directory before running this example
# print("\n--- Agent Query: Ingest new resumes ---")
# response = agent_executor.invoke({
#     "messages": [HumanMessage(content="Please process any new resumes.")]
# })
# print("Agent Response:", response["messages"][-1].content)

# Example 4: Clear the database
# print("\n--- Agent Query: Clear all resume data ---")
# response = agent_executor.invoke({
#     "messages": [HumanMessage(content="Delete all stored resumes.")]
# })
# print("Agent Response:", response["messages"][-1].content)



--- Agent Query: List resumes ---
üìã Listing resumes...
 1. mantu_nigam_resume.txt
 2. raj_patel_resume.txt
 3. sarah_johnson_resume.txt
 4. vinod_malik_resume.txt
Agent Response: You have the following resumes in the database:

1. mantu_nigam_resume.txt
2. raj_patel_resume.txt
3. sarah_johnson_resume.txt
4. vinod_malik_resume.txt

--- Agent Query: Search for candidates with Angular and NestJS ---
üîç Searching for candidates with skills: Angular, NestJS

üéØ SEARCH RESULTS
### Top 3 Candidates

1. **Sarah Johnson**
   - **Relevant Skills:** Angular, NestJS, TypeScript, Node.js
   - **Why They Are a Good Fit:** Sarah has direct experience as a Full Stack Developer, specifically building microservices with NestJS and developing UIs with Angular. Her educational background in Computer Science from Stanford adds to her qualifications.
   - **Matching Percentage:** 100%

2. **Mantu Nigam**
   - **Relevant Skills:** Angular, Nest, React, HTML, CSS
   - **Why They Are a Good Fit:** Mant

In [22]:
import shutil
import os

# Remove corrupted database
if os.path.exists("./resume_db"):
    shutil.rmtree("./resume_db")
    print("‚úì Cleaned up old database")

# Recreate folder
os.makedirs("./resume_db", exist_ok=True)
print("‚úì Ready for fresh start")


‚úì Cleaned up old database
‚úì Ready for fresh start


In [24]:
def generate_resume(data):
    """Generate a text resume from data dictionary"""
    resume = []

    # Header
    resume.append(data['name'].upper())
    resume.append(f"{data['email']} | {data['phone']} | {data['location']}")
    resume.append("")

    # Skills
    resume.append("SKILLS")
    resume.append(", ".join(data['skills']))
    resume.append("")

    # Experience
    resume.append("EXPERIENCE")
    for exp in data['experiences']:
        resume.append(f"{exp['title']} | {exp['company']} | {exp['duration']}")
        for resp in exp['responsibilities']:
            resume.append(f"- {resp}")
        resume.append("")

    # Education
    resume.append("EDUCATION")
    edu = data['education']
    resume.append(f"{edu['degree']} | {edu['institution']} | {edu['year']}")

    return "\n".join(resume)


def save_resume(data, filepath):
    """Save resume to file"""
    with open(filepath, 'w') as f:
        f.write(generate_resume(data))


# Example usage
if __name__ == "__main__":
    resume_data = {
        "name": "Vinod Malik",
        "email": "vinod.malik@email.com",
        "phone": "+91-9876543210",
        "location": "Bangalore",
        "skills": ["Python", "LangChain", "VectorDB", "Google Cloud", "Docker"],
        "experiences": [
            {
                "title": "Senior AI Engineer",
                "company": "TechCorp",
                "duration": "2021-Present",
                "responsibilities": [
                    "Built Full stack Gen AI App",
                    "Developed Mobile App Using Material UI"
                ]
            },
            {
                "title": "Software Engineer",
                "company": "TCS",
                "duration": "2022-2025",
                "responsibilities": [
                    "Created ML models and REST APIs with Python"
                ]
            }
        ],
        "education": {
            "degree": "MCA",
            "institution": "IPU Delhi",
            "year": "2011"
        }
    }

    # Generate and print
    print(generate_resume(resume_data))

    # Save to file
    save_resume(resume_data, "resumes/vinod_malik_resume.txt")


VINOD MALIK
vinod.malik@email.com | +91-9876543210 | Bangalore

SKILLS
Python, LangChain, VectorDB, Google Cloud, Docker

EXPERIENCE
Senior AI Engineer | TechCorp | 2021-Present
- Built Full stack Gen AI App
- Developed Mobile App Using Material UI

Software Engineer | TCS | 2022-2025
- Created ML models and REST APIs with Python

EDUCATION
MCA | IPU Delhi | 2011


In [None]:
# Add resumes to database
ingest_resumes()


üì• Ingesting resumes...
‚úì Created new database with 2 chunks from 2 resumes
‚úì Database saved successfully


In [None]:
# Search for candidates with specific skills
skills = "front end, angular, react, microservice using nestjs"  # Change this to your required skills
search_resumes(skills)

üîç Searching for candidates with skills: front end, angular, react, microservice using nestjs

üéØ SEARCH RESULTS
### Top Candidates Summary

**1. Mantu Nigam**  
- **Relevant Skills:** React, Angular, Nest, Full stack development  
- **Why They Are a Good Fit:** Mantu has direct experience building full stack applications using Angular and Nest, which aligns perfectly with the required skills. His background in both front-end and back-end technologies makes him a strong candidate for roles involving microservices.  
- **Matching Percentage:** 90%

**2. Vinod Malik**  
- **Relevant Skills:** (Limited relevant skills)  
- **Why They Are a Good Fit:** While Vinod has experience as a Senior AI Engineer, his resume does not mention any front-end technologies like Angular or React, nor does it indicate experience with microservices using Nest. His skills are more focused on AI and Python, making him less suitable for the required role.  
- **Matching Percentage:** 40%

### Summary
Mantu 

'### Top Candidates Summary\n\n**1. Mantu Nigam**  \n- **Relevant Skills:** React, Angular, Nest, Full stack development  \n- **Why They Are a Good Fit:** Mantu has direct experience building full stack applications using Angular and Nest, which aligns perfectly with the required skills. His background in both front-end and back-end technologies makes him a strong candidate for roles involving microservices.  \n- **Matching Percentage:** 90%\n\n**2. Vinod Malik**  \n- **Relevant Skills:** (Limited relevant skills)  \n- **Why They Are a Good Fit:** While Vinod has experience as a Senior AI Engineer, his resume does not mention any front-end technologies like Angular or React, nor does it indicate experience with microservices using Nest. His skills are more focused on AI and Python, making him less suitable for the required role.  \n- **Matching Percentage:** 40%\n\n### Summary\nMantu Nigam is the clear top candidate due to his relevant experience with both Angular and Nest, while Vinod

In [6]:
# Search for candidates with specific skills
skills = "python, ML, Gen AI"  # Change this to your required skills
search_resumes(skills)

üîç Searching for candidates with skills: python, ML, Gen AI

üéØ SEARCH RESULTS
### Top 3 Candidates

1. **Vinod Malik**
   - **Relevant Skills:** Python, Machine Learning, Gen AI (LangChain)
   - **Why They Are a Good Fit:** Vinod has extensive experience in building Gen AI applications using Python and LangChain, along with a solid background in developing ML models. His current role as a Senior AI Engineer aligns well with the required skills.
   - **Matching Percentage:** 95%

2. **Raj Patel**
   - **Relevant Skills:** Python, Docker, Machine Learning (implied through API development)
   - **Why They Are a Good Fit:** While Raj's experience is more focused on backend development, he has strong Python skills and experience with Docker. However, he lacks direct experience in Gen AI and ML, which slightly lowers his fit.
   - **Matching Percentage:** 75%

3. **Mantu Nigam**
   - **Relevant Skills:** Python, Machine Learning (implied through API development)
   - **Why They Are a Go

"### Top 3 Candidates\n\n1. **Vinod Malik**\n   - **Relevant Skills:** Python, Machine Learning, Gen AI (LangChain)\n   - **Why They Are a Good Fit:** Vinod has extensive experience in building Gen AI applications using Python and LangChain, along with a solid background in developing ML models. His current role as a Senior AI Engineer aligns well with the required skills.\n   - **Matching Percentage:** 95%\n\n2. **Raj Patel**\n   - **Relevant Skills:** Python, Docker, Machine Learning (implied through API development)\n   - **Why They Are a Good Fit:** While Raj's experience is more focused on backend development, he has strong Python skills and experience with Docker. However, he lacks direct experience in Gen AI and ML, which slightly lowers his fit.\n   - **Matching Percentage:** 75%\n\n3. **Mantu Nigam**\n   - **Relevant Skills:** Python, Machine Learning (implied through API development)\n   - **Why They Are a Good Fit:** Mantu has Python skills and experience in creating ML mode