In [1]:
%pip install langchain-openai langchain-community langchain-text-splitters langchain-core faiss-cpu python-dotenv pypdf


Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import shutil
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader, DirectoryLoader, PyPDFLoader
from dotenv import load_dotenv

# Load API key
load_dotenv()
print("‚úì Setup complete")
print("API Key:", os.environ.get("OPENAI_API_KEY", "Not set")[:20] + "...")

# Create folders if they don't exist
os.makedirs("./resumes", exist_ok=True)
os.makedirs("./resume_db", exist_ok=True)
print("‚úì Folders created: ./resumes and ./resume_db")


  from pydantic.v1.fields import FieldInfo as FieldInfoV1


‚úì Setup complete
API Key: sk-proj-2D_k1B8OV3MW...
‚úì Folders created: ./resumes and ./resume_db


In [7]:
import shutil
import os

# Remove corrupted database
if os.path.exists("./resume_db"):
    shutil.rmtree("./resume_db")
    print("‚úì Cleaned up old database")

# Recreate folder
os.makedirs("./resume_db", exist_ok=True)
print("‚úì Ready for fresh start")


‚úì Cleaned up old database
‚úì Ready for fresh start


In [10]:
# Initialize embeddings globally
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

def ingest_resumes():
    """Load resumes from ./resumes folder and add to vector database"""
    print("üì• Ingesting resumes...")
    
    # Load text files
    txt_loader = DirectoryLoader("./resumes", glob="**/*.txt", loader_cls=TextLoader)
    txt_docs = txt_loader.load()
    
    # Load PDF files
    pdf_loader = DirectoryLoader("./resumes", glob="**/*.pdf", loader_cls=PyPDFLoader)
    pdf_docs = pdf_loader.load()
    
    all_docs = txt_docs + pdf_docs
    
    if not all_docs:
        print("‚ùå No resumes found in ./resumes folder")
        return
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(all_docs)
    
    # Check if FAISS index file exists (not just folder)
    db_file_exists = os.path.exists("./resume_db/index.faiss")
    
    if db_file_exists:
        # Load existing and add new documents
        vectorstore = FAISS.load_local("./resume_db", embeddings, allow_dangerous_deserialization=True)
        vectorstore.add_documents(chunks)
        print(f"‚úì Added {len(chunks)} chunks from {len(all_docs)} resumes")
    else:
        # Create new vector store
        vectorstore = FAISS.from_documents(chunks, embeddings)
        print(f"‚úì Created new database with {len(chunks)} chunks from {len(all_docs)} resumes")
    
    vectorstore.save_local("./resume_db")
    print("‚úì Database saved successfully")


def list_resumes():
    """List all resumes stored in vector database"""
    print("üìã Listing resumes...")
    
    if not os.path.exists("./resume_db/index.faiss"):
        print("‚ùå No database found. Please ingest resumes first.")
        return
    
    vectorstore = FAISS.load_local("./resume_db", embeddings, allow_dangerous_deserialization=True)
    
    # Get all documents
    all_docs = vectorstore.docstore._dict
    
    # Extract unique sources
    sources = set()
    for doc in all_docs.values():
        if hasattr(doc, 'metadata') and 'source' in doc.metadata:
            sources.add(os.path.basename(doc.metadata['source']))
    
    print(f"\n‚úì Found {len(sources)} resumes in database:")
    for i, source in enumerate(sorted(sources), 1):
        print(f"  {i}. {source}")


def search_resumes(skills):
    """Search resumes by skills and return best matches"""
    print(f"üîç Searching for candidates with skills: {skills}")
    
    if not os.path.exists("./resume_db/index.faiss"):
        print("‚ùå No database found. Please ingest resumes first.")
        return
    
    vectorstore = FAISS.load_local("./resume_db", embeddings, allow_dangerous_deserialization=True)
    
    # Search for relevant resume chunks
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    docs = retriever.invoke(skills)
    
    # Create context from retrieved documents
    context = "\n\n".join([f"Resume {i+1}:\n{doc.page_content}" for i, doc in enumerate(docs)])
    
    # Create prompt for LLM
    prompt = f"""You are a recruiter assistant. Based on the following resume excerpts, identify and rank the best candidates for the required skills.

Required Skills: {skills}

Resume Excerpts:
{context}

Please provide:
1. Top 3 best matching candidates
2. Their relevant skills and experience
3. Why they are a good fit

Answer:"""
    
    # Get LLM response
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    response = llm.invoke(prompt)
    
    print("\n" + "="*60)
    print("üéØ SEARCH RESULTS")
    print("="*60)
    print(response.content)
    print("="*60)
    
    return response.content


def clear_resumes():
    """Clear all resumes from vector database"""
    print("üóëÔ∏è  Clearing resume database...")
    
    if os.path.exists("./resume_db"):
        shutil.rmtree("./resume_db")
        print("‚úì Database cleared successfully")
    else:
        print("‚ùå No database found")

print("‚úì Agent functions loaded successfully")


‚úì Agent functions loaded successfully


In [11]:
# Add resumes to database
ingest_resumes()


üì• Ingesting resumes...
‚úì Created new database with 2 chunks from 2 resumes
‚úì Database saved successfully


In [12]:
# Search for candidates with specific skills
skills = "Python, Machine Learning, LangChain"  # Change this to your required skills
search_resumes(skills)


üîç Searching for candidates with skills: Python, Machine Learning, LangChain

üéØ SEARCH RESULTS
Based on the provided resume excerpts and the required skills of Python, Machine Learning, and LangChain, here are the top candidates ranked by their relevance to the required skills:

### 1. Mantu Nigam
**Relevant Skills:**
- Python
- LangChain
- Machine Learning (implied through experience with ML models)

**Experience:**
- **Senior AI Engineer at TechCorp (2021-Present):** Built RAG applications with LangChain and OpenAI, showcasing direct experience with LangChain and Python.
- **Software Engineer at DataMinds (2019-2021):** Created ML models and REST APIs with Python, demonstrating proficiency in machine learning.

**Why They Are a Good Fit:**
Mantu has direct experience with all three required skills. His current role involves using LangChain and Python in a practical application, and he has a solid background in machine learning. This makes him the strongest candidate for the posi

'Based on the provided resume excerpts and the required skills of Python, Machine Learning, and LangChain, here are the top candidates ranked by their relevance to the required skills:\n\n### 1. Mantu Nigam\n**Relevant Skills:**\n- Python\n- LangChain\n- Machine Learning (implied through experience with ML models)\n\n**Experience:**\n- **Senior AI Engineer at TechCorp (2021-Present):** Built RAG applications with LangChain and OpenAI, showcasing direct experience with LangChain and Python.\n- **Software Engineer at DataMinds (2019-2021):** Created ML models and REST APIs with Python, demonstrating proficiency in machine learning.\n\n**Why They Are a Good Fit:**\nMantu has direct experience with all three required skills. His current role involves using LangChain and Python in a practical application, and he has a solid background in machine learning. This makes him the strongest candidate for the position.\n\n---\n\n### 2. Vinod Malik\n**Relevant Skills:**\n- Python\n- Machine Learning

In [14]:
# Simple interactive menu
def run_agent():
    while True:
        print("\n" + "="*60)
        print("üìÑ RESUME AGENT - MENU")
        print("="*60)
        print("1. Ingest Resumes")
        print("2. List Resumes")
        print("3. Search by Skills")
        print("4. Clear Database")
        print("5. Exit")
        print("="*60)
        
        choice = input("\nEnter your choice (1-5): ")
        
        if choice == "1":
            ingest_resumes()
        elif choice == "2":
            list_resumes()
        elif choice == "3":
            skills = input("Enter required skills (comma-separated): ")
            search_resumes(skills)
        elif choice == "4":
            confirm = input("Are you sure? (yes/no): ")
            if confirm.lower() == "yes":
                clear_resumes()
        elif choice == "5":
            print("üëã Goodbye!")
            break
        else:
            print("‚ùå Invalid choice. Please try again.")
