In [8]:
from gensim.models import Word2Vec

model = Word2Vec.load("stackexchange_model")


In [9]:
import numpy as np

def document_vector(doc, model):
    # remove out-of-vocabulary words
    words = [word for word in doc.lower().split() if word in model.wv.key_to_index]
    if not words:
        return np.zeros(model.vector_size)
    return np.mean(model.wv[words], axis=0)


In [10]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def rank_resumes_against_jd(job_description, cv_texts, model, top_n=5):
    jd_vec = document_vector(job_description, model)

    if np.all(jd_vec == 0):
        raise ValueError("JD vector is empty — possibly no valid words found in the model vocabulary.")

    similarities = []
    for i, cv in enumerate(cv_texts):
        cv_vec = document_vector(cv, model)
        if np.all(cv_vec == 0):
            similarity = 0.0
        else:
            similarity = cosine_similarity([jd_vec], [cv_vec])[0][0]
        similarities.append((i, similarity))

    # Sort by similarity (highest first)
    ranked = sorted(similarities, key=lambda x: x[1], reverse=True)

    # Print top matches
    print(f"\nTop {top_n} matching CVs for the given JD:\n" + "-"*50)
    for idx, score in ranked[:top_n]:
        print(f"CV #{idx + 1} | Similarity: {score:.4f}")
        print(cv_texts[idx][:300] + '...')  # Print a snippet
        print("-" * 50)
    
    return ranked[:top_n]


In [11]:
job_description = "a leading ecommerce agency is looking to hire a web developer to join their team of ecommerce developers. the role will involve working on different types of client and a candidate should have the ability to create and implement unique, engaging, and intuitive work to meet client requirements. requirements: 34 years of experience in web development **** or more years on ecommerce projects in an agency setting development skills in the following technologies: o asp.net and asp o vb and vb.net o sql server / tsql o com/dcom o xml, html o javascript knowledge of microsoft commerce server, any other ecommerce solutions if this sounds like a challenge that you are ready to take up, send in your cv now"



In [15]:
import pandas as pd

# Load CVs from CSV
cv_df = pd.read_csv("d:/pre_course/Pratice/ResumeMatchingMVP/data/extracted_resumes.csv")

# Extract only the text column (can add .tolist() to convert to list)
cv_texts = cv_df['Extracted_Text'].fillna("").tolist()

ranked = rank_resumes_against_jd(job_description, cv_texts, model, top_n=5)


Top 5 matching CVs for the given JD:
--------------------------------------------------
CV #243 | Similarity: 0.9224
JESSICA CLAIRE
Montgomery Street, San Francisco, CA 94105⚫ (555) 432-1000⚫ resumesample@example.com
Professional Summary
Highly motivated Sales Associate with extensive customer service and sales experience. Outgoing sales professional with track record of driving increased sales, improving buying e...
--------------------------------------------------
CV #244 | Similarity: 0.9224
JESSICA CLAIRE
Montgomery Street, San Francisco, CA 94105 ⚫ (555) 432-1000 ⚫resumesample@example.com
Professional Summary
Highly motivated Sales Associate with extensive customer service and sales experience. Outgoing sales professional with track
record of driving increased sales, improving buying ...
--------------------------------------------------
CV #379 | Similarity: 0.9224
JESSICA CLAIRE
Montgomery Street, San Francisco, CA 94105⚫ (555) 432-1000⚫ resumesample@example.com
Professional S

In [16]:
for idx, score in ranked:
    print(f"File: {cv_df['File'][idx]} | Score: {score:.4f}")


File: 28.docx | Score: 0.9224
File: 28.png | Score: 0.9224
File: 9.docx | Score: 0.9224
File: 9.png | Score: 0.9224
File: 137.docx | Score: 0.9144


In [17]:
results = pd.DataFrame([
    {"File": cv_df['File'][idx], "Similarity": score}
    for idx, score in ranked
])

results.to_csv("top_matched_resumes.csv", index=False)
