In [1]:
!pip install openai chromadb sentence-transformers pdfplumber



In [1]:
import openai
import chromadb
import pdfplumber
from sentence_transformers import SentenceTransformer
from google.colab import files  # For file uploads

# Initialize OpenAI API Key
openai.api_key = "your api"
# Load Embedding Model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB (Vector Database)
chroma_client = chromadb.PersistentClient(path="./resume_db")
collection = chroma_client.get_or_create_collection(name="resumes")

print("✅ Setup Complete")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Setup Complete


In [4]:
# Upload multiple PDF resumes
uploaded_files = files.upload()

# Function to extract text from multiple PDFs
def extract_text_from_pdfs(uploaded_files):
    resume_texts = {}  # Dictionary to store extracted text
    for filename in uploaded_files.keys():
        with pdfplumber.open(filename) as pdf:
            text = " ".join([page.extract_text() for page in pdf.pages if page.extract_text()])
            resume_texts[filename] = text  # Store extracted text
            print(f"✅ Extracted text from {filename}")
    return resume_texts

# Extract text from uploaded resumes
resume_texts = extract_text_from_pdfs(uploaded_files)


Saving Bilvika resume.pdf to Bilvika resume.pdf
Saving Harini y_resume_CSE.pdf to Harini y_resume_CSE (1).pdf
Saving Shreya A-Resume-CSE.pdf to Shreya A-Resume-CSE (1).pdf
Saving Monikap_resume.pdf to Monikap_resume (2).pdf
✅ Extracted text from Bilvika resume.pdf
✅ Extracted text from Harini y_resume_CSE (1).pdf
✅ Extracted text from Shreya A-Resume-CSE (1).pdf
✅ Extracted text from Monikap_resume (2).pdf


In [5]:
# Function to Add Resumes to Vector DB
def add_resumes_to_db(resume_texts):
    for i, (filename, text) in enumerate(resume_texts.items()): # Added enumerate to generate unique ids
        candidate_name = filename.replace(".pdf", "")  # Use filename as candidate name
        embedding = embedding_model.encode(text).tolist()
        collection.add(
            ids=[str(i)],  # Provide unique IDs for each document
            embeddings=[embedding],
            documents=[text],
            metadatas=[{"name": candidate_name}],
        )
        print(f"📂 Stored {candidate_name} in Vector DB ✅")

# Add all extracted resumes to the database
add_resumes_to_db(resume_texts)


📂 Stored Bilvika resume in Vector DB ✅
📂 Stored Harini y_resume_CSE (1) in Vector DB ✅
📂 Stored Shreya A-Resume-CSE (1) in Vector DB ✅
📂 Stored Monikap_resume (2) in Vector DB ✅


In [6]:
# Function to Retrieve Matching Resumes
def search_resumes(job_description):
    job_embedding = embedding_model.encode(job_description).tolist()
    results = collection.query(query_embeddings=[job_embedding], n_results=3)  # Top 3 matches
    return results["documents"][0] if results["documents"] else []

# Example: Find candidates for a sample job
job_desc = "Looking for a Python developer with experience in machine learning."
matching_resumes = search_resumes(job_desc)

print("\n🔍 Top Matching Resumes Found:")
for i, resume in enumerate(matching_resumes):
    print(f"{i+1}. {resume[:200]}...")  # Show preview of resume text



🔍 Top Matching Resumes Found:
1. YELLA. HARINI
HBeAngRaIlNurIu, Karnataka | p: +91 9381367081 | harini0704.y@gmail.com
EDUCATION
Alliance University of Engineering
Computer Science Engineering Bengaluru, Karnataka
Specialization: Art...
2. Monika P
Bachelor of Technology - Computer Science and Engineering
Bengaluru, Karnataka-562107 | p: 6362722581
mail: monikap0522@gmail.com
EDUCATION SKILLS
Alliance College of Engineering, Bengaluru P...
3. Mellamputi Bilvika
Bengaluru, Karnataka • bilvikanaidu@gmail.com • +91 9493755895
Education
ALLIANCE UNIVERSITY Bengaluru, Karnataka
Computer Science and Engineering. 2021-present
Specialization in Ar...


In [7]:
import time

# Function to Generate AI Match Summary
def match_candidates(job_description):
    relevant_resumes = search_resumes(job_description)
    context = "\n".join(relevant_resumes)

    # Changed the model to 'gpt-3.5-turbo'
    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo", # Switched to a more widely available model
            messages=[
                {"role": "system", "content": "You are an AI recruiter assistant. Match the job description with best candidates."},
                {"role": "user", "content": f"Job Description: {job_description}\nRelevant Resumes: {context}"}
            ]
        )
        return response["choices"][0]["message"]["content"]
    except openai.error.RateLimitError:
        print("Rate limit exceeded. Waiting for 60 seconds...")
        time.sleep(60)  # Wait for 60 seconds before retrying
        return match_candidates(job_description) # Retry the request
