# Notebook 6: Resume Scoring System

## Objective
Develop a comprehensive rule-based scoring system to evaluate resume quality and job fit.

## Goals
1. Implement completeness scoring (required sections present)
2. Calculate skills matching score (with/without job description)
3. Assess experience relevance
4. Evaluate formatting and structure quality
5. Combine into overall score (0-100)

## Dependencies
- `re` - Pattern matching
- `pandas` - Data manipulation
- `matplotlib`, `seaborn` - Visualization

## Approach
**Rule-based scoring** - More reliable and interpretable than ML for this use case

## Scoring Components (Total: 100 points)
1. **Completeness (30 pts)** - Essential sections present
2. **Skills Match (25 pts)** - Relevant technical skills
3. **Experience (20 pts)** - Years and relevance
4. **Education (15 pts)** - Degree level and relevance
5. **Formatting (10 pts)** - Structure and readability


---


## 1. Setup and Imports


In [None]:
import re
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import json

# Define paths
DATA_DIR = Path('../data')
SAMPLES_DIR = DATA_DIR / 'samples'
SCORES_DIR = DATA_DIR / 'scores'

# Create scores directory
SCORES_DIR.mkdir(parents=True, exist_ok=True)

print("✓ All imports successful")
print(f"✓ Samples directory: {SAMPLES_DIR.absolute()}")
print(f"✓ Scores directory: {SCORES_DIR.absolute()}")


---


## 2. Completeness Scoring (30 points)

Award points for having essential resume sections.


In [None]:
def score_completeness(text: str) -> dict:
    """
    Score resume completeness based on presence of key sections.
    
    Args:
        text: Resume text
    
    Returns:
        Dictionary with score and details
    """
    sections = {
        'contact': (r'\b(email|phone|@|contact)\b', 5),
        'summary': (r'\b(summary|profile|objective|about)\b', 3),
        'experience': (r'\b(experience|employment|work history)\b', 10),
        'education': (r'\b(education|degree|university|college)\b', 7),
        'skills': (r'\b(skills|competencies|technologies)\b', 5),
    }
    
    total_score = 0
    found_sections = {}
    
    for section_name, (pattern, points) in sections.items():
        if re.search(pattern, text, re.IGNORECASE):
            total_score += points
            found_sections[section_name] = True
        else:
            found_sections[section_name] = False
    
    return {
        'score': total_score,
        'max_score': 30,
        'sections_found': found_sections,
        'completeness_percent': (total_score / 30) * 100
    }


# Test the function
test_resume = """
John Doe
Email: john@example.com
Phone: 123-456-7890

SUMMARY
Experienced software engineer

EXPERIENCE
Senior Developer at Tech Corp (2020-Present)

EDUCATION
BS in Computer Science, MIT

SKILLS
Python, Java, React
"""

result = score_completeness(test_resume)

print("Completeness Scoring Test")
print("="*60)
print(f"Score: {result['score']}/{result['max_score']} points ({result['completeness_percent']:.0f}%)")
print(f"\nSections found:")
for section, found in result['sections_found'].items():
    status = "✓" if found else "✗"
    print(f"  {status} {section.capitalize()}")

print("\n✓ Completeness scoring function defined")


---


## 3. Skills Matching Score (25 points)


In [None]:
# Define skill categories and weights
SKILL_DATABASE = {
    'programming': ['Python', 'Java', 'JavaScript', 'C++', 'C#', 'TypeScript', 'Ruby', 'Go', 'Rust', 'PHP'],
    'web': ['React', 'Angular', 'Vue.js', 'Node.js', 'Django', 'Flask', 'HTML', 'CSS', 'Express'],
    'database': ['SQL', 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', 'Oracle', 'Elasticsearch'],
    'cloud': ['AWS', 'Azure', 'GCP', 'Docker', 'Kubernetes', 'Jenkins', 'Terraform'],
    'ml_ai': ['Machine Learning', 'Deep Learning', 'TensorFlow', 'PyTorch', 'NLP', 'Computer Vision'],
    'tools': ['Git', 'GitHub', 'Jira', 'Agile', 'Scrum', 'CI/CD', 'Linux'],
}

# Flatten all skills
ALL_SKILLS = []
for category, skills in SKILL_DATABASE.items():
    ALL_SKILLS.extend(skills)

print(f"Skill Database Loaded: {len(ALL_SKILLS)} skills across {len(SKILL_DATABASE)} categories")


In [None]:
def extract_skills_from_text(text: str) -> list:
    """Extract skills mentioned in text."""
    found_skills = []
    text_lower = text.lower()
    
    for skill in ALL_SKILLS:
        if skill.lower() in text_lower:
            found_skills.append(skill)
    
    return found_skills


def score_skills(resume_text: str, job_description: str = None) -> dict:
    """
    Score skills based on quantity and job match.
    
    Args:
        resume_text: Resume text
        job_description: Optional job description for matching
    
    Returns:
        Dictionary with score and details
    """
    resume_skills = extract_skills_from_text(resume_text)
    
    # Base score for having skills
    num_skills = len(resume_skills)
    
    # Scoring tiers
    if num_skills >= 10:
        base_score = 15
    elif num_skills >= 7:
        base_score = 12
    elif num_skills >= 5:
        base_score = 10
    elif num_skills >= 3:
        base_score = 7
    else:
        base_score = num_skills * 2  # 2 points per skill
    
    # Bonus for job description match
    match_score = 0
    matched_skills = []
    
    if job_description:
        job_skills = extract_skills_from_text(job_description)
        matched_skills = list(set(resume_skills) & set(job_skills))
        
        # Award points for matches
        if len(job_skills) > 0:
            match_rate = len(matched_skills) / len(job_skills)
            match_score = min(10, int(match_rate * 10))
    
    total_score = min(25, base_score + match_score)
    
    return {
        'score': total_score,
        'max_score': 25,
        'skills_found': resume_skills,
        'skills_count': num_skills,
        'matched_skills': matched_skills,
        'match_score': match_score
    }


# Test the function
test_resume = "Skills: Python, Java, React, AWS, Docker, PostgreSQL, Git, Agile, TensorFlow, Kubernetes"
test_job = "Looking for candidate with Python, AWS, Docker, Kubernetes experience"

result = score_skills(test_resume, test_job)

print("Skills Scoring Test")
print("="*60)
print(f"Resume skills found: {result['skills_count']}")
print(f"  {result['skills_found']}")
print(f"\nJob skills matched: {len(result['matched_skills'])}")
print(f"  {result['matched_skills']}")
print(f"\nScore: {result['score']}/{result['max_score']} points")
print(f"  Base score: {result['score'] - result['match_score']}")
print(f"  Match bonus: {result['match_score']}")

print("\n✓ Skills scoring function defined")


---
