
Resume Scoring Logic Documentation

The resume scoring system evaluates candidates based on three main components:

1. JD-CV Match Score (50 points)
   - Compares the skills mentioned in the resume with those required in the Job Description (JD).
   - Uses keyword/token overlap, fuzzy matching, or cosine similarity between JD keywords and extracted resume skills.
   - Example: If the JD has ['python', 'data science', 'nltk'], and the resume includes 2 out of 3 → Score = (2/3) * 50 ≈ 33.3

2. AI/ML/Tech Keyword Match (30 points)
   - Gives extra weight to resumes that include relevant AI/ML/data science terms like:
     'machine learning', 'deep learning', 'nltk', 'transformers', 'pandas', 'numpy', 'sklearn', 'power bi', etc.
   - Each keyword found contributes to the score. Bonus for rare or advanced terms (e.g., ‘transformers’, ‘LLMs’).
   - Score = (# keywords found / total expected) * 30

3. Resume Quality Score (20 points)
   - Based on formatting cues:
     - Presence of Name, Email, and Phone (basic structure)
     - Clean layout (length not too long/short, 1–2 pages ideal)
     - Presence of sections like Education, Projects, Skills, Experience
   - Also includes if batch years and graduation year are extractable.
   - If all key formatting criteria are met → full 20, else partial.

Final Resume Score = JD Match Score + AI Keyword Score + Resume Quality Score
Maximum Score: 100


In [None]:
def extract_experience_years(resume_text):
    # Look for patterns like "5 years", "3+ years", etc.
    year_patterns = [
        r"(\d+)[\+]?\s+years",
        r"(\d+)[\+]?\s+yrs",
        r"(\d{4})\s*[-–—]\s*(2\d{3}|present|current|now)",
    ]
    
    total_years = 0
    for pattern in year_patterns:
        matches = re.findall(pattern, resume_text, re.IGNORECASE)
        for match in matches:
            if isinstance(match, tuple):  # For date ranges
                start_year = int(match[0])
                if match[1].lower() in ["present", "current", "now"]:
                    end_year = datetime.datetime.now().year
                else:
                    try:
                        end_year = int(match[1])
                    except ValueError:
                        continue
                total_years += end_year - start_year
            else:
                try:
                    years = int(match)
                    total_years += years
                except ValueError:
                    continue
    
    # If we found multiple instances, take an average (simple approach)
    if len(year_patterns) > 0:
        total_years = min(total_years, 20)  # Cap at 20 years to avoid outliers
    
    return total_years

def check_ai_experience(resume_text):
    ai_keywords = [
        "machine learning", "deep learning", "neural network", "tensorflow",
        "pytorch", "keras", "scikit-learn", "nlp", "natural language processing",
        "computer vision", "cv", "ai", "artificial intelligence", "data science",
        "predictive modeling", "reinforcement learning", "ml ops", "llm", 
        "large language model", "transformers", "gpt", "bert", "data mining"
    ]
    
    ai_exp = []
    for keyword in ai_keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', resume_text.lower()):
            ai_exp.append(keyword)
    
    return {
        "has_ai_experience": len(ai_exp) > 0,
        "ai_skills": ai_exp
    }

def calculate_jd_match_score(resume_text, job_description):
    # Convert both to lowercase
    resume_lower = resume_text.lower()
    jd_lower = job_description.lower()
    
    # Extract key skills/requirements from JD (simplified approach)
    # In a real system, you would use NER or more sophisticated extraction
    skill_pattern = r"(?:knowledge|experience|proficient|skill|familiarity).*?(in|with|of)\s+([a-zA-Z\s,]+)"
    jd_skills_matches = re.findall(skill_pattern, jd_lower)
    
    jd_skills = []
    for match in jd_skills_matches:
        skills = match[1].split(',')
        for skill in skills:
            cleaned_skill = skill.strip()
            if len(cleaned_skill) > 2:  # Avoid very short terms
                jd_skills.append(cleaned_skill)
    
    # Add common programming languages and tools
    common_skills = ["python", "java", "javascript", "c\+\+", "sql", "nosql", 
                    "aws", "azure", "docker", "kubernetes", "git", "agile"]
    
    all_skills = set(jd_skills + common_skills)
    
    # Count matches
    matches = 0
    for skill in all_skills:
        if re.search(r'\b' + re.escape(skill) + r'\b', resume_lower):
            matches += 1
    
    # Calculate match percentage
    match_score = min(100, int((matches / max(1, len(all_skills))) * 100))
    
    return match_score

def score_resume(resume_text, job_description=""):
    # Initialize scores dictionary
    scores = {
        "formatting": 0,
        "experience": 0,
        "education": 0,
        "skills": 0,
        "overall": 0
    }
    
    # Score formatting (length, sections, etc.)
    word_count = len(resume_text.split())
    if 300 <= word_count <= 1000:
        scores["formatting"] = 20  # Good length
    elif word_count < 300:
        scores["formatting"] = 10  # Too short
    else:
        scores["formatting"] = 15  # Might be too verbose
    
    # Check for clear sections
    sections = ["experience", "education", "skills", "projects", "achievements"]
    section_count = sum(1 for section in sections if re.search(r'\b' + re.escape(section) + r'\b', resume_text.lower()))
    scores["formatting"] += min(10, section_count * 2)
    
    # Experience score
    years = extract_experience_years(resume_text)
    if years >= 5:
        scores["experience"] = 25
    elif years >= 3:
        scores["experience"] = 20
    elif years >= 1:
        scores["experience"] = 15
    else:
        scores["experience"] = 10
    
    # Education score
    edu_keywords = ["degree", "bachelor", "master", "phd", "university", "college"]
    edu_count = sum(1 for keyword in edu_keywords if re.search(r'\b' + re.escape(keyword) + r'\b', resume_text.lower()))
    scores["education"] = min(20, edu_count * 5)
    
    # Skills score
    tech_skills = ["python", "java", "javascript", "html", "css", "react", "node", 
                  "angular", "vue", "sql", "nosql", "mongodb", "aws", "azure", 
                  "gcp", "docker", "kubernetes", "git", "ci/cd", "agile", "scrum"]
    
    skill_count = sum(1 for skill in tech_skills if re.search(r'\b' + re.escape(skill) + r'\b', resume_text.lower()))
    scores["skills"] = min(25, skill_count * 2)
    
    # Calculate overall score
    scores["overall"] = scores["formatting"] + scores["experience"] + scores["education"] + scores["skills"]
    
    # Calculate JD match score if provided
    jd_match = calculate_jd_match_score(resume_text, job_description) if job_description else 0
    
    # Get AI experience
    ai_experience = check_ai_experience(resume_text)
    
    return {
        "scores": scores,
        "years_experience": years,
        "jd_match_score": jd_match,
        "ai_experience": ai_experience
    }