In [2]:

# 📂 Resume Screening AI - Project Notebook

## Phase 1: Dummy Resume Setup

resumes = {
    "Arun_Kumar.txt": """Name: Arun Kumar
Email: arun.ds@gmail.com
Phone: +91-9876543210
Summary:
Aspiring Data Scientist with strong foundation in statistics, machine learning, and Python programming.
Skills:
- Python, R, SQL
- Pandas, NumPy, Scikit-learn
- Data Visualization: Matplotlib, Seaborn
- Machine Learning, Deep Learning
- Tableau, Power BI
Experience:
Intern - Data Science, ABC Corp (6 months)
Built predictive models for customer churn.
Education:
MSc Data Science - Anna University, 2023""",

    "Priya_Mehta.txt": """Name: Priya Mehta
Email: priya.dev@outlook.com
Phone: +91-9988776655
Summary:
Software Engineer with 2+ years of experience in full-stack web development and backend systems.
Skills:
- Java, C++, Python
- HTML, CSS, JavaScript, React
- MySQL, MongoDB
- REST APIs, Git, Docker
Experience:
Software Developer - TechX Pvt Ltd (2021–2023)
Built scalable backend APIs and web dashboards.
Education:
BTech Computer Science - SRM Institute, 2021""",

    "Neha_Sharma.txt": """Name: Neha Sharma
Email: neha.marketing@gmail.com
Phone: +91-9123456780
Summary:
Marketing Analyst with a passion for campaign analytics, performance tracking, and digital tools.
Skills:
- Excel, Google Analytics, Power BI
- A/B Testing, Market Research
- Content Strategy, SEO
- CRM tools: HubSpot, Salesforce
Experience:
Marketing Analyst - DigiMark Agency (2020–2023)
Increased lead generation through targeted campaigns.
Education:
MBA Marketing - Christ University, 2020"""
}

## Phase 2: Extract Skills Function

skill_keywords = [
    'python', 'r', 'sql', 'pandas', 'numpy', 'scikit-learn', 'matplotlib', 'seaborn',
    'machine learning', 'deep learning', 'tableau', 'power bi',
    'java', 'c++', 'html', 'css', 'javascript', 'react',
    'mysql', 'mongodb', 'rest apis', 'git', 'docker',
    'excel', 'google analytics', 'a/b testing', 'market research',
    'content strategy', 'seo', 'hubspot', 'salesforce'
]

def extract_skills(resume_text, skill_set):
    found_skills = []
    resume_text_lower = resume_text.lower()
    for skill in skill_set:
        if skill in resume_text_lower:
            found_skills.append(skill)
    return found_skills

## Phase 3: Analyze Each Resume

for name, content in resumes.items():
    skills = extract_skills(content, skill_keywords)
    print(f"🔍 {name} - Extracted Skills:\n{skills}\n")

## Phase 4: Job Description Parsing and Matching

job_description = """We are looking for a Data Scientist with experience in Python, machine learning, and data visualization.
The ideal candidate should be proficient in Pandas, Scikit-learn, and have familiarity with Power BI or Tableau.
Knowledge of SQL and data cleaning techniques is a plus."""

job_required_skills = extract_skills(job_description, skill_keywords)
print("🎯 Skills from Job Description:\n", job_required_skills)

## Phase 5: Score Matching

def match_score(resume_skills, job_skills):
    resume_set = set(resume_skills)
    job_set = set(job_skills)
    common = resume_set.intersection(job_set)
    union = resume_set.union(job_set)
    if not union:
        return 0
    return round(len(common) / len(union), 2)

# Score each resume
for name, content in resumes.items():
    skills = extract_skills(content, skill_keywords)
    score = match_score(skills, job_required_skills)
    print(f"📄 {name} - Match Score: {score * 100:.0f}%")


🔍 Arun_Kumar.txt - Extracted Skills:
['python', 'r', 'sql', 'pandas', 'numpy', 'scikit-learn', 'matplotlib', 'seaborn', 'machine learning', 'deep learning', 'tableau', 'power bi']

🔍 Priya_Mehta.txt - Extracted Skills:
['python', 'r', 'sql', 'java', 'c++', 'html', 'css', 'javascript', 'react', 'mysql', 'mongodb', 'rest apis', 'git', 'docker']

🔍 Neha_Sharma.txt - Extracted Skills:
['r', 'power bi', 'git', 'excel', 'google analytics', 'a/b testing', 'market research', 'content strategy', 'seo', 'hubspot', 'salesforce']

🎯 Skills from Job Description:
 ['python', 'r', 'sql', 'pandas', 'scikit-learn', 'machine learning', 'tableau', 'power bi']
📄 Arun_Kumar.txt - Match Score: 67%
📄 Priya_Mehta.txt - Match Score: 16%
📄 Neha_Sharma.txt - Match Score: 12%
