In [6]:
from pymongo import MongoClient
from langchain.embeddings import OpenAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os

# Set your OpenAI API Key
os.environ["OPENAI_API_KEY"] = "your_key"

def fetch_best_matching_candidate(query: str, mongo_uri="mongodb://localhost:27017/", db_name="documentDB", collection_name="extracted_info", top_k=1):
    """
    Given a query, fetch the best matching candidate from MongoDB using cosine similarity on embeddings.
    
    Args:
        query (str): User query like "candidates with YOLO skills from Chennai"
        mongo_uri (str): MongoDB connection string
        db_name (str): MongoDB database name
        collection_name (str): Collection where embeddings are stored
        top_k (int): Number of top matches to return

    Returns:
        list: Top k matching documents (with similarity score)
    """
    # Connect to MongoDB
    client = MongoClient(mongo_uri)
    db = client[db_name]
    collection = db[collection_name]

    # Load documents with embeddings
    docs = list(collection.find({"city_skill_embedding": {"$exists": True}}))
    if not docs:
        print("No documents with city_skill_embedding found.")
        return []

    # Generate embedding for user query
    embedding_model = OpenAIEmbeddings()
    query_embedding = embedding_model.embed_query(query)

    # Calculate cosine similarity between query and all city+skill embeddings
    similarities = []
    for doc in docs:
        score = cosine_similarity([query_embedding], [doc["city_skill_embedding"]])[0][0]
        similarities.append((score, doc))

    # Sort by similarity score
    similarities.sort(reverse=True, key=lambda x: x[0])

    # Return top-k results
    top_matches = []
    for score, doc in similarities[:top_k]:
        doc_with_score = {
            "name": doc.get("name"),
            "city": doc.get("city"),
            "skills": doc.get("skills"),
            "email": doc.get("email"),
            "phone": doc.get("phone"),
            "score": round(score, 4),
            "filename": doc.get("filename")
        }
        top_matches.append(doc_with_score)

    return top_matches

def fetch_candidate_resume(filename: str, mongo_uri="mongodb://localhost:27017/", db_name="documentDB", collection_name="extracted_info"):
    """
    Fetch the full resume content (raw_text) of a candidate given the filename.

    Args:
        filename (str): PDF filename of the candidate (e.g., "Candidate_Summary_Arjun_Mehta.pdf")
        mongo_uri (str): MongoDB connection string
        db_name (str): MongoDB database name
        collection_name (str): Collection name

    Returns:
        str: The full resume text (raw_text)
    """
    # Connect to MongoDB
    client = MongoClient(mongo_uri)
    db = client[db_name]
    collection = db[collection_name]

    doc = collection.find_one({"filename": filename})

    if doc and "raw_text" in doc:
        return doc["raw_text"]
    elif doc and "content" in doc:
        return doc["content"]
    else:
        return "Resume content not found."


In [5]:
results = fetch_best_matching_candidate("get me the candidate with YOLO and OpenCV experience from Chennai")
for result in results:
    print(result)

{'name': 'Dr. T.K. Senthil Kumar', 'city': 'Chennai', 'skills': 'Artificial Intelligence, Machine Learning, Computer Vision, Deep Learning, Generative AI, MLOps', 'email': 'contact@gradascentglobal.com', 'phone': '9444700278', 'score': np.float64(0.8221), 'filename': 'Candidate_Summary_Dr_T_K_Senthil_Kumar.pdf'}


In [9]:
# Step 1: Get best matching candidate
results = fetch_best_matching_candidate("candidates with YOLO skills from Chennai", top_k=1)
print(result)

# Step 2: Fetch resume text
if results:
    candidate = results[0]
    full_resume = fetch_candidate_resume(candidate["filename"])
    print(f"\nFull Resume of {candidate['name']}:\n")
    print(full_resume[:1000])  # print only first 1000 characters
else:
    print("No matching candidates found.")


{'name': 'Dr. T.K. Senthil Kumar', 'city': 'Chennai', 'skills': 'Artificial Intelligence, Machine Learning, Computer Vision, Deep Learning, Generative AI, MLOps', 'email': 'contact@gradascentglobal.com', 'phone': '9444700278', 'score': np.float64(0.8221), 'filename': 'Candidate_Summary_Dr_T_K_Senthil_Kumar.pdf'}

Full Resume of Dr. T.K. Senthil Kumar:

Dr. T.K. Senthil Kumar is an accomplished AI educator and data science expert based in Chennai,
India, with over 16 years of rich experience spanning both academia and industry. He is currently
working as a Subject Matter Expert at L&T EduTech, where he plays a key role in developing
curriculum and delivering hands-on sessions in Artificial Intelligence, Machine Learning, Computer
Vision, Deep Learning, Generative AI, and MLOps. Prior to this, he served as a Data Scientist at
NYBL, Dubai, specializing in computer vision model development for industrial applications. Dr.
Senthil Kumar has also worked with leading EduTech firms like Great 