In [26]:
pip install python-docx sentence-transformers




In [27]:
import docx
import re
from sentence_transformers import SentenceTransformer, util
from openpyxl import Workbook, load_workbook
import os

# Load Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Editable job description
job_description = """
•	Evaluate new product requests and provide technical recommendations
•	Act as an expert in the development of purchased or manufactured products
•	Perform tests, develop protocols, and analyze results
•	Communicate directly with franchisees and the customer service team to address technical inquiries
•	Conduct on-site studies and 3D scans in various locations (residences, hotels, cruise ships)
•	Present new products to internal teams and the franchise network through interactive demonstrations
•	Contribute to the product certification process and ensure compliance with manufacturing standards
"""

# Extract text from a Word document
def extract_text_from_cv(cv_path):
    doc = docx.Document(cv_path)
    return "\n".join([p.text for p in doc.paragraphs])

# Extract skills from job description
def extract_skills_from_job_description(job_desc):
    words = re.findall(r'\b[A-Za-z]+\b', job_desc)
    return list(set([w.lower() for w in words if len(w) > 2 and w.lower() not in ["the", "and", "for", "with", "you", "are", "our", "etc"]]))

# Extract skills from CV based on job description
def extract_skills_from_cv(cv_text, job_skills):
    return [skill for skill in job_skills if skill in cv_text.lower()]

# Extract experience sentences
def extract_experience_from_cv(cv_text):
    keywords = ["experience", "worked", "internship", "job", "project"]
    return [s.strip() for s in cv_text.split(".") if any(k in s.lower() for k in keywords)]

# Extract education sentences
def extract_education_from_cv(cv_text):
    keywords = ["degree", "university", "college", "bachelor", "master", "school", "education"]
    return [s.strip() for s in cv_text.split(".") if any(k in s.lower() for k in keywords)]

# Calculate semantic similarity match score
def semantic_match_score(cv_text, job_desc):
    cv_embedding = model.encode(cv_text, convert_to_tensor=True)
    job_embedding = model.encode(job_desc, convert_to_tensor=True)
    score = util.pytorch_cos_sim(cv_embedding, job_embedding).item()
    return round(score * 10, 2)

# Analyze and return the CV info
def analyze_cv(cv_path):
    cv_text = extract_text_from_cv(cv_path)
    job_skills = extract_skills_from_job_description(job_description)

    skills = extract_skills_from_cv(cv_text, job_skills)
    experience = extract_experience_from_cv(cv_text)
    education = extract_education_from_cv(cv_text)
    match_score = semantic_match_score(cv_text, job_description)

    return {
        "File Name": os.path.basename(cv_path),
        "Skills": ", ".join(skills),
        "Experience": " | ".join(experience),
        "Education": " | ".join(education),
        "Match Score": match_score
    }

# Save result to Excel (append if file exists)
def save_to_excel(result, output_excel):
    if not os.path.exists(output_excel):
        wb = Workbook()
        ws = wb.active
        ws.title = "ATS Results"
        ws.append(["File Name", "Skills", "Experience", "Education", "Match Score"])
    else:
        wb = load_workbook(output_excel)
        ws = wb.active

    ws.append([
        result["File Name"],
        result["Skills"],
        result["Experience"],
        result["Education"],
        result["Match Score"]
    ])

    wb.save(output_excel)

# Main program
def main():
    output_excel = "cv_results.xlsx"
    num_files = int(input("How many CVs would you like to analyze? "))

    for i in range(1, num_files + 1):
        print(f"\n[{i}] Please enter the full path to CV #{i} (Word .docx file):")
        cv_path = input("> ").strip()

        if not os.path.exists(cv_path) or not cv_path.endswith(".docx"):
            print("❌ Invalid file. Please try again.")
            continue

        print(f"✅ Analyzing {os.path.basename(cv_path)}...")
        result = analyze_cv(cv_path)
        save_to_excel(result, output_excel)
        print(f"📄 Result for {result['File Name']} saved.\n")

    print(f"✅ All done! Results saved in '{output_excel}'")

# Run the script
main()


How many CVs would you like to analyze? 2

[1] Please enter the full path to CV #1 (Word .docx file):
> /content/Initial CV Calgary (AutoRecovered).docx
✅ Analyzing Initial CV Calgary (AutoRecovered).docx...
📄 Result for Initial CV Calgary (AutoRecovered).docx saved.


[2] Please enter the full path to CV #2 (Word .docx file):
> /content/Initial CV.docx
✅ Analyzing Initial CV.docx...
📄 Result for Initial CV.docx saved.

✅ All done! Results saved in 'cv_results.xlsx'
