In [3]:
import pandas as pd
from pymongo import MongoClient

# Load the CSV file
csv_file_path = '/Users/shubhamrohilla/Work/Interactly Data Science Intern Project/Dataset/Candidate_data.csv'  # Replace with your CSV file path
candidates_df = pd.read_csv(csv_file_path)

# Convert the job skills to list
candidates_df['Job Skills'] = candidates_df['Job Skills'].apply(lambda x: x.split(', '))

# Connect to local MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client['candidate_db']
collection = db['candidates']

# Clear the existing collection to avoid duplication
collection.delete_many({})

# Insert data into MongoDB
candidates = candidates_df.to_dict(orient='records')
collection.insert_many(candidates)

# Verify data insertion
print("Data inserted into MongoDB:")
for candidate in collection.find():
    print(candidate)


Data inserted into MongoDB:
{'_id': ObjectId('66ab7edc18e508458c345e29'), 'Name': 'John Doe', 'Contact Details': 'john.doe@example.com', 'Location': 'New York', 'Job Skills': ['Java; Big Data; Hadoop'], 'Experience': '5 years', 'Projects': 'Developed a big data processing system using Hadoop and Spark.', 'Comments': 'Strong problem-solving skills.'}
{'_id': ObjectId('66ab7edc18e508458c345e2a'), 'Name': 'Jane Smith', 'Contact Details': 'jane.smith@example.com', 'Location': 'San Francisco', 'Job Skills': ['JavaScript; React; Node.js'], 'Experience': '3 years', 'Projects': 'Built a real-time chat application using React and Node.js.', 'Comments': 'Excellent in team collaboration.'}
{'_id': ObjectId('66ab7edc18e508458c345e2b'), 'Name': 'Bob Johnson', 'Contact Details': 'bob.johnson@example.com', 'Location': 'New York', 'Job Skills': ['Java; Spring; Microservices'], 'Experience': '6 years', 'Projects': 'Implemented a microservices architecture for an e-commerce platform.', 'Comments': 'Proa

In [4]:
from transformers import AutoModel, AutoTokenizer
import torch
from torch.nn.functional import cosine_similarity

# Path to the directory where the model is saved
model_directory = "/Users/shubhamrohilla/Work/Interactly Data Science Intern Project/LLM Model/fine-tuned-model"
tokenizer_directory = "/Users/shubhamrohilla/Work/Interactly Data Science Intern Project/LLM Model/tokenizer"

# Load your fine-tuned model and tokenizer
model = AutoModel.from_pretrained(model_directory)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_directory)

def get_matching_candidates(job_description, top_k=10):
    # Tokenize job description
    job_inputs = tokenizer(job_description, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        job_outputs = model(**job_inputs)
    job_embedding = job_outputs.last_hidden_state.mean(dim=1)  # Mean pooling

    # Fetch candidates from MongoDB
    candidates = list(collection.find())
    matches = []

    for candidate in candidates:
        candidate_desc = f"{candidate['Job Skills']} {candidate['Experience']} {candidate['Projects']} {candidate['Comments']}"
        candidate_inputs = tokenizer(candidate_desc, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            candidate_outputs = model(**candidate_inputs)
        candidate_embedding = candidate_outputs.last_hidden_state.mean(dim=1)  # Mean pooling

        similarity_score = cosine_similarity(job_embedding, candidate_embedding).item()
        matches.append((candidate, similarity_score))

    # Sort candidates based on similarity score in ascending order
    matches = sorted(matches, key=lambda x: x[1], reverse=False)[:top_k]

    return matches

print("Enter the Job Description to find the matching candidates:- \n")
job_description = input()
print("Job Description entered is:- ", job_description)
print("\nFinding matching candidates...\n")

matches = get_matching_candidates(job_description, top_k=10)

# Remove duplicates
unique_matches = []
seen_candidates = set()
for candidate, score in matches:
    if candidate['Name'] not in seen_candidates:
        unique_matches.append((candidate, score))
        seen_candidates.add(candidate['Name'])

print("************************* Match Found *******************************")
for candidate, score in unique_matches:
    print(f"Candidate: {candidate['Name']}, Match Score: {score}")
    print(f"Contact: {candidate['Contact Details']}, Location: {candidate['Location']}")
    print(f"Skills: {candidate['Job Skills']}, Experience: {candidate['Experience']}")
    print(f"Projects: {candidate['Projects']}, Comments: {candidate['Comments']}")
    print("="*50)

# Debugging: Print all match scores
print("\nAll match scores (for debugging):")
for candidate, score in matches:
    print(f"Candidate: {candidate['Name']}, Match Score: {score}")


Enter the Job Description to find the matching candidates:- 

Job Description entered is:-  Pick up the top 10 profiles for the following job description, We are looking for a skilled UI Developer to join our dynamic team. The ideal candidate will have a strong background in front-end development, with proficiency in HTML, CSS, JavaScript, and modern frameworks like React or Angular. Your primary responsibility will be to create visually appealing and user-friendly web interfaces that enhance user experience and align with our brand guidelines.

Finding matching candidates...

************************* Match Found *******************************
Candidate: Ian Malcolm, Match Score: 0.28794750571250916
Contact: ian.malcolm@yahoo.com, Location: Houston, TX
Skills: ['Kubernetes', 'Ruby', 'AWS', 'React'], Experience: 11 years
Projects: Front-end redesign, Comments: Front-end specialist
Candidate: Michael Scott, Match Score: 0.30339956283569336
Contact: michael.scott@outlook.com, Location: 