In [2]:
from file_io import extract_pdf_text
from embedding import get_embedding, similarity


In [3]:
# 1) Get resume text from PDF (your real resume path)
resume_path = "sample_data/scanned_resume.pdf.pdf"  # change only if filename differs
resume_text = extract_pdf_text(resume_path)

# 2) Define JD as a string (simple example)
jd_text = """
We are looking for a data scientist with strong skills in Python, machine learning,
data analysis, and experience building classification and regression models.
The candidate should know Pandas, NumPy, statistics, and SQL.
"""

# 3) Helper: convert score into human-readable explanation
def explain_score(score: float) -> str:
    """
    Convert similarity score (0 to 1) into human-readable text.
    """
    percentage = score * 100

    if score >= 0.75:
        level = "Excellent match"
        note = "This resume is highly aligned with the JD."
    elif score >= 0.55:
        level = "Good match"
        note = "Many skills are aligned, but there may be some gaps."
    elif score >= 0.35:
        level = "Weak match"
        note = "Only partially aligned. May not be the best fit."
    else:
        level = "Poor match"
        note = "Resume is mostly unrelated to the JD."

    return f"{percentage:.1f}% match – {level}. {note}"

# 4) Compute embeddings and similarity
jd_emb = get_embedding(jd_text)
resume_emb = get_embedding(resume_text)
score = similarity(jd_emb, resume_emb)

# 5) Display result
print("JD text (first 300 chars):")
print(jd_text[:300])

print("\nResume text (first 300 chars):")
print(resume_text[:300])

print("\nRaw similarity score:", score)
print("Human-readable:", explain_score(score))


JD text (first 300 chars):

We are looking for a data scientist with strong skills in Python, machine learning,
data analysis, and experience building classification and regression models.
The candidate should know Pandas, NumPy, statistics, and SQL.


Resume text (first 300 chars):
LIAM ANDERSON Scanner Operator © support@qwikresumecom \ (123)4567899 9 Los Angeles @ wwwaqwikresume.com Pe] PROFESSIONAL SUMMARY With 2 years of experience as a Scanner Operator, | excel in high- speed document imaging and data verification. My expertise in operating advanced scanning equipment ens

Raw similarity score: 0.09234675765037537
Human-readable: 9.2% match – Poor match. Resume is mostly unrelated to the JD.


In [4]:
# 1) Reuse the same JD text from above (or define again)
jd_text = """
We are looking for a data scientist with strong skills in Python, machine learning,
data analysis, and experience building classification and regression models.
The candidate should know Pandas, NumPy, statistics, and SQL.
"""

# 2) Three example resumes (text-only demo)
resume_texts = [
    {
        "name": "Resume A - Strong Data Scientist",
        "text": """
        I am a data scientist with 2 years of experience in Python and machine learning.
        I have built multiple classification and regression models using Pandas, NumPy,
        scikit-learn and I regularly work with SQL for data analysis.
        """
    },
    {
        "name": "Resume B - Somewhat Related (Data Analyst)",
        "text": """
        I work as a data analyst using Excel, Power BI, and some Python.
        I have basic experience in statistics and reporting, and I am learning
        machine learning models. I mostly focus on dashboards and business reports.
        """
    },
    {
        "name": "Resume C - Not Related (Sales)",
        "text": """
        I am a sales executive with experience in client relationships,
        marketing campaigns, lead generation, and negotiating deals.
        I work with CRM tools and manage a small sales team.
        """
    }
]

# 3) Compute JD embedding
jd_emb = get_embedding(jd_text)

# 4) Compute similarity for each resume
results = []
for item in resume_texts:
    name = item["name"]
    text = item["text"]
    emb = get_embedding(text)
    score = similarity(jd_emb, emb)
    results.append((name, score))

# 5) Sort by similarity (desc)
results_sorted = sorted(results, key=lambda x: x[1], reverse=True)

# 6) Display ranking
print("Ranking of example resumes for this JD:\n")
for rank, (name, score) in enumerate(results_sorted, start=1):
    print(f"{rank}. {name} --> similarity = {score:.4f}")


Ranking of example resumes for this JD:

1. Resume A - Strong Data Scientist --> similarity = 0.6863
2. Resume B - Somewhat Related (Data Analyst) --> similarity = 0.6173
3. Resume C - Not Related (Sales) --> similarity = 0.2949
