In [5]:
#!pip install spacy

In [1]:
import os
import sys
import sqlite3
import re
import PyPDF2
from flask import Flask, request, render_template, jsonify
from werkzeug.utils import secure_filename
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime
import dateparser
from flask import redirect, url_for
from flask import flash
from flask import Response
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from flask import send_file
import io
from reportlab.lib.utils import simpleSplit


app = Flask(__name__)
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER

# Defining Hardcoding skills to extract from the Candidates Profile
skill_keywords = [
        "Python", "Java", "C++", "TensorFlow", "PyTorch", "GraphQL",
        "Scikit-learn", "Machine Learning", "Deep Learning", "Data Science", 
        "Big Data", "Cloud Computing", "Azure", "MySQL", "MATLAB", "Hadoop",
        "Big-Data", "Data Analytics", "Data Analyst", "Predictive Modeling",
        "Keras", "Statistical Modeling", "Statistical Analysis", 
        # Additional skills
        "Django", "Flask", "JavaScript", "TypeScript", "React", "Node.js", 
        "Angular", "Vue.js", "Ruby", "C#", "Swift", "Scala", "Kotlin", 
        "Rust", "Golang", "Insomnia", "Postman", "Shell Scripting", "Bash",
        "Spark", "Kafka", "MongoDB", "PostgreSQL", "Redis", "NoSQL",  
        "Docker", "Kubernetes", "CI/CD", "GitHub", "GitLab", "Jenkins", 
        "Terraform", "Ansible", "Puppet", "Selenium", "Network Security",
        "DevOps", "Agile", "Scrum", "Test Automation", "Unit Testing",  
        "Pandas", "NumPy", "Matplotlib", "Seaborn", "OpenCV", "Computer Vision", 
        "Reinforcement Learning", "Data Engineering", "Data Warehousing", 
        "Tableau", "Power BI", "Business Intelligence", "Data Pipeline",
        "Graph Databases", "Elasticsearch", "Quantum Computing", "JIRA",
        "Blockchain", "Cryptocurrency", "Smart Contracts", "Microservice", 
        "API Development", "OAuth", "Web Services", "RESTful APIs", 
        "Web Scraping", "Web Development", "Mobile Development", "Android", 
        "Flutter", "Xamarin", "Networking", "Cybersecurity", "Large Language Model",
        "Penetration Testing", "Intrusion Detection", "Cloud Security", "DevSecOps",

        # Adding more skills
        "Alteryx", "Data Mining", "DevSecOps", "Data Visualization", "Data Visualisation",
        "Microsoft Office", "Powerpoint", ".NET", "Dotnet", "MXNet", "Apache"
        "Feature Engineering", "Data Exploration", "Prescriptive Analytics",
        "Predictive Analytics", "Predictive Models Analysis", "Forecast", "Data Mining",
        "Quantitative analysis", "Assembly", "Perl", "Assembly", "Qlik Sense", "Qlik Sense",
        "Snowflake", "Neural Network", "GANs", "LangChain", "MLflow", "Hugging Face",
        "AutoML", "XGBoost", "LightGBM", "CatBoost", "Grafana",

        # Cyber Security Skills
        "Burp Suite", "Kali Linux", "Nmap", "Wireshark", "Packet Tracer", "Splunk",
        "Metasploit", "Prometheus",

        # Testing / QA
        "TestNG", "JUnit", "Cypress",

        # Mobile Development
        "React Native", "SwiftUI", "Ionic",  
    ]

# Defining Single Digit Skills
singledigit_skills = ["AI", "R", "C", "AWS", "LLM", "GO", "NLP", "ETL",
                      "GCP", "HTML", "CSS", "PHP", "SQL", "ELK", "JWT",
                      "SPSS", "SOAP", "JAX", "IAM",
                      "Go", "Aws", "Visio", "Excel", "Vuex"                     # Differently written single word skills
                     ]

# Global Variable
job_descriptionGlobal = ""
resume_YearsExperience = 0

def extract_text(filepath):
    text = ""
    if filepath.lower().endswith(".pdf"):
        try:
            with open(filepath, "rb") as pdf_file:
                reader = PyPDF2.PdfReader(pdf_file)
                for page in reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
        except Exception as e:
            print(f"Error reading PDF file: {e}", file=sys.stderr)
    elif filepath.lower().endswith(".docx"):
        try:
            import mammoth
            with open(filepath, "rb") as docx_file:
                result = mammoth.extract_raw_text(docx_file)
                text = result.value if result else ""
        except ImportError:
            print("Mammoth module not found. Cannot parse DOCX files.", file=sys.stderr)
        except Exception as e:
            print(f"Error reading DOCX file: {e}", file=sys.stderr)
    return text.strip()

def extract_email(text):
    match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}", text)
    return match.group(0) if match else "Email not found"

# Works for Ashish also and others but with a line break with ashish
def extract_phone(text):
    # Update regex to handle possible line breaks and other delimiters
    match = re.search(r"(?:\+\d{1,3}\s?)?(?:\(?\d{2,4}\)?[\s.-]*)(\d{2,4})[\s.-]*(\d{2,4})[\s.-]*(\d{2,4})", text)
    return match.group(0) if match else "Phone not found"

def normalize_skill(skill):
    return skill.replace("-", "").replace(" ", "").lower()     #Normalize a skill by removing hyphens, spaces, and converting to lowercase.

# Match skills even if not capitalized, ignore hyphens, spaces, etc
def extract_skills(text):
    text_lower = text.lower()
    
    # Create a mapping of normalized skills to original skill names
    normalized_skill_map = {normalize_skill(skill): skill for skill in skill_keywords}
    
    # Find matches in text
    extracted_skills = [
        normalized_skill_map[norm_skill]
        for norm_skill in normalized_skill_map
        if norm_skill in normalize_skill(text_lower)
    ]

    return extracted_skills if extracted_skills else ["No Skills Found"]

# Removing duplicate skills from the list
def deduplicate_skills(skills):
    seen = {}
    for skill in skills:
        key = skill.lower()
        # Prefer capitalized skill if seen before in lowercase
        if key not in seen or skill[0].isupper():
            seen[key] = skill
    return list(seen.values())

# new function handles experience parsing without "-" new REGEX (Extract Years from Resume)
def categorize_experience(text):
    global resume_YearsExperience  # Ensure we use the latest stored resume_YearsExperience
    
    match = re.search(r"(?:(?:over|more than|at least|up to)\s*)?(\d{1,2})\s*(?:\+\s*)?years?", text.lower())
    if match:
        years = int(match.group(1))
    else:
        # Updated regex: supports "Aug 2022 - Aug 2023" and "Jun 2024 Aug 2024"
        date_match = re.findall(
            r"(\w{3,9})\s+(\d{4})\s*(?:[-–]\s*|(?=\w{3,9}\s+\d{4}))(\w{3,9})\s+(\d{4})", text
        )
        years = 0
        if date_match:
            for start_month, start_year, end_month, end_year in date_match:
                try:
                    start_date = dateparser.parse(f"{start_month} {start_year}")
                    end_date = dateparser.parse(f"{end_month} {end_year}")
                    if start_date and end_date:
                        duration = (end_date.year - start_date.year) + (end_date.month - start_date.month) / 12
                        years += max(0, duration)
                except Exception:
                    continue

    resume_YearsExperience = years            # Globally storing number of experience years
    print('YEARS before globally storing', years)
    
    if years >= 7:
        return "Senior"
    elif years >= 3 and years <=7:
        return "Mid-Level"
    elif years > 0:
        return "Junior"
    return "Not Mentioned"

# Extracting experience years from the job_description
def get_required_experience(job_description):
    """Extracts required experience level from job description text."""
    if not job_description:
        return "Not Mentioned"

    job_description = job_description.lower()

    if "intern" in job_description or "pursuing" in job_description:
        return "Intern"
    elif any(term in job_description for term in ["entry-level", "0-2 years", "junior"]):
        return "Junior"
    elif any(term in job_description for term in ["3-7 years", "mid-level"]):
        return "Mid-Level"
    elif any(term in job_description for term in ["7+ years", "senior"]):
        return "Senior"

    return "Not Mentioned"


def adjust_match_score(match_score, experience_level, job_description, resume_skills):
    required_experience = get_required_experience(job_description)
    job_skills = extract_skills_from_job(job_description)
    single_skills = match_single_digit_skills(job_description, singledigit_skills)
    job_skills = job_skills + single_skills                             # Combine both skills into single list

    experience_mapping = {
        "Intern": ["Intern"],
        "Junior": ["Entry-Level", "0-2 years", "Junior"],
        "Mid-Level": ["Mid-Level", "3-7 years"],
        "Senior": ["Senior", "7+ years"]
    }

    job_description_lower = job_description.lower()
    
    # Experience Level Boost (20%)
    
    # job_description (if Internship job matched with a junior, senior, mid-level employee boosted)
    if any(term in job_description_lower for term in ["intern", "internship", "interns", "entry-level"]):
        if experience_level in ["Intern", "Junior", "Mid-Level", "Senior"]:
            print(f"Intern Boost Applied for {experience_level}")
            print("Experience_level =>", experience_level)
            print("MatchScoreBefore boost =>", match_score)
            match_score = min(100, match_score * 1.20)  # Boost score by 20%
            print("MatchScoreAfter boost =>", match_score)
    # Matched with Junior position
    elif any(term in job_description_lower for term in ["entry-level", "entry level", "0-2 year"]):
        if experience_level in ["Junior", "Senior", "Mid-Level"]:
            print(f"Junior Match Applied for {experience_level}")
            print("Experience_level =>", experience_level)
            print("MatchScoreBefore boost =>", match_score)
            match_score = min(100, match_score * 1.20)  # Boost score by 20%
    # Matched with Mid-Level position
    elif any(term in job_description_lower for term in ["mid-level", "mid level", "3-7 year"]):
        if experience_level in ["Senior", "Mid-Level"]:
            print(f"MidLevel Match Applied for {experience_level}")
            print("Experience_level =>", experience_level)
            print("MatchScoreBefore boost =>", match_score)
            match_score = min(100, match_score * 1.20)  # Boost score by 20%
    # Matched with Senior position
    elif any(term in job_description_lower for term in ["senior", "7+ year"]):
        if experience_level in ["Senior", "senior"]:
            print(f"Senior Match Applied for {experience_level}")
            print("Experience_level =>", experience_level)
            print("MatchScoreBefore boost =>", match_score)
            match_score = min(100, match_score * 1.20)  # Boost score by 20%
            
    
    # Skills Match Boost (Up to 20%)
    skill_overlap = len(set(resume_skills) & set(job_skills))
    if skill_overlap > 0:
        skill_boost = min(100, match_score * (1 + (0.02 * skill_overlap)))  # 2% boost per matching skill
        print('Matching Score Before -> Skills Match Boosting', match_score)
        print(f"[DEBUG] Skill Match Boost: {skill_overlap} matching skills | New Score: {skill_boost}")
        match_score = skill_boost
    
    return min(100, match_score), skill_overlap


def save_to_db(name, email, phone, skills, score, experience_level, experience_years=0.0, num_skills_matched=0):    
# def save_to_db(name, email, phone, skills, score, experience_level):
    try:
        conn = sqlite3.connect("resumes.db", check_same_thread=False)
        cursor = conn.cursor()
       
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS resumes (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT, email TEXT, phone TEXT, skills TEXT, score REAL, experience_level TEXT, report TEXT,
                experience_years REAL, skills_matched INTEGER                     
            )
        """)        

        # ✅ Generate the formatted report text
        report = f"""
        Resume ATS Analysis Report

        Filename: {name}
        Email: {email}
        Phone: {phone}
        Experience Level: {experience_level}
        Skills: {", ".join(skills)}
        Match Score: {score}%

        Summary:
        - This resume has been analyzed against a provided job description.
        - The match score is calculated based on experience and skills.
        - Consider improving skills based on job description requirements.

        Thank you for using the Resume ATS Scanner.
        """
        
        cursor.execute("""
            INSERT INTO resumes (name, email, phone, skills, score, experience_level, report, experience_years, skills_matched)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            name, email, phone, ",".join(skills), score, experience_level, report,
            experience_years, num_skills_matched
        ))
        
        conn.commit()
    except Exception as e:
        print(f"Error saving to database: {e}", file=sys.stderr)
    finally:
        conn.close()

@app.route("/download/<filename>")
def download_report(filename):
    """Generates and downloads the ATS match report as a multi-page PDF with wrapped text."""
    global job_descriptionGlobal  # Ensure we use the latest stored job description

    conn = sqlite3.connect("resumes.db", check_same_thread=False)
    cursor = conn.cursor()
    cursor.execute("SELECT email, phone, skills, score, experience_level FROM resumes WHERE name = ?", (filename,))
    row = cursor.fetchone()
    conn.close()

    if not row:
        return "Report not found!", 404

    email, phone, skills, score, experience_level = row
    skills_list = skills.split(",")

    # Extract job-required skills
    job_skills = extract_skills_from_job(job_descriptionGlobal)
    print(f"[DEBUG] Extracted Skills from Job Description: {job_skills}")  # ✅ Debugging Output

    single_skills = match_single_digit_skills(job_descriptionGlobal, singledigit_skills)
    print("Single Digit Skills !!! ", single_skills)
    job_skills = job_skills + single_skills                             # Combine both skills into single list

    # Compare skills
    matched_skills, missing_skills = match_resume_skills(skills_list, job_skills)

    # Create an in-memory PDF buffer
    pdf_buffer = io.BytesIO()
    pdf = canvas.Canvas(pdf_buffer, pagesize=letter)
    pdf.setFont("Helvetica", 12)

    # Page settings
    y_position = 750  # Start writing from this Y position
    line_spacing = 20
    margin_left = 50
    page_height = 750  # Approximate max Y height before adding a new page
    summary_text = [
        "✅ Resume analyzed against job description.",
        "✅ Score based on skills and experience match.",
        "✅ Consider improving skills based on job requirements."
    ]

    def add_new_page():
        """Handles adding a new page if the content exceeds the page height."""
        nonlocal y_position
        pdf.showPage()
        pdf.setFont("Helvetica", 12)
        y_position = page_height  # Reset Y position for the new page

    # **1️⃣ Title Section**
    pdf.setFont("Helvetica-Bold", 18)
    pdf.drawString(180, y_position, "📄 Resume ATS Analysis Report")
    y_position -= 40

    # **2️⃣ Resume Details**
    details = [
        ("🔹 Filename:", filename),
        ("📧 Email:", email),
        ("📞 Phone:", phone),
        ("🏆 Experience Level:", experience_level),
        ("📊 Match Score:", f"{round(score)}%")          # Round to integer score
        # ("📊 Match Score:", f"{score:.2f}%")
    ]

    pdf.setFont("Helvetica", 12)
    for label, value in details:
        pdf.setFont("Helvetica-Bold", 12)
        pdf.drawString(margin_left, y_position, label)
        pdf.setFont("Helvetica", 12)
        pdf.drawString(margin_left + 120, y_position, value)
        y_position -= line_spacing
        if y_position <= 100:
            add_new_page()

    # **3️⃣ Skills Section**
    pdf.setFont("Helvetica-Bold", 12)
    pdf.drawString(margin_left, y_position, "🛠 Skills:")
    y_position -= line_spacing

    pdf.setFont("Helvetica", 12)
    for skill in skills_list:
        if skill in matched_skills:
            pdf.setFont("Helvetica-Bold", 12)  # ✅ Highlight matched skills in bold
            pdf.drawString(margin_left + 20, y_position, f"✔ {skill} (MATCH)")
        else:
            pdf.setFont("Helvetica", 12)
            pdf.drawString(margin_left + 20, y_position, f"✔ {skill}")
        y_position -= line_spacing
        if y_position <= 100:
            add_new_page()

    # **4️⃣ Missing Skills Section**
    if missing_skills:
        pdf.setFont("Helvetica-Bold", 12)
        pdf.drawString(margin_left, y_position, "📌 Missing Required Skills:")
        y_position -= line_spacing

        pdf.setFont("Helvetica", 12)
        for skill in missing_skills:
            pdf.drawString(margin_left + 20, y_position, f"❌ {skill}")
            y_position -= line_spacing
            if y_position <= 100:
                add_new_page()

    # **5️⃣ Summary Section (Auto-Wrap)**
    pdf.setFont("Helvetica-Bold", 12)
    pdf.drawString(margin_left, y_position, "📌 Summary:")
    y_position -= line_spacing

    pdf.setFont("Helvetica", 12)
    for line in summary_text:
        wrapped_lines = simpleSplit(line, "Helvetica", 12, 500)  # Wrap text
        for sub_line in wrapped_lines:
            pdf.drawString(margin_left + 20, y_position, sub_line)
            y_position -= line_spacing
            if y_position <= 100:
                add_new_page()

    # **6️⃣ Footer**
    pdf.setFont("Helvetica-Oblique", 10)
    pdf.drawString(margin_left, y_position - 30, "✨ Thank you for using the Resume ATS Scanner! ✨")

    # Save the PDF to buffer
    pdf.showPage()
    pdf.save()
    pdf_buffer.seek(0)

    # Send the PDF file
    return send_file(pdf_buffer, as_attachment=True, download_name=f"{filename}_ATS_Report.pdf", mimetype="application/pdf")


def match_resume_to_job(resume_text, job_description):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([resume_text, job_description])
    return round(cosine_similarity(vectors)[0, 1] * 100, 2)  # Convert to percentage

 # Extract Skills regardless of being case sensitive, hyphens, spaces
def extract_skills_from_job(job_description):
    """Extracts required skills from job description while ignoring hyphens and spaces."""
    job_description_lower = job_description.lower()
    
    normalized_skill_map = {normalize_skill(skill): skill for skill in skill_keywords}
    
    job_skills = [
        normalized_skill_map[norm_skill]
        for norm_skill in normalized_skill_map
        if norm_skill in normalize_skill(job_description_lower)
    ]

    return job_skills if job_skills else ["No Skills Found"]

# Finding skills being irrespective of being case sensitive, hyphens, spaces
def match_resume_skills(resume_skills, job_skills):
    """Compares resume skills with job description skills (ignoring hyphens & spaces)."""
    
    # Normalize both resume and job skills
    resume_skills_map = {normalize_skill(skill): skill for skill in resume_skills}
    job_skills_map = {normalize_skill(skill): skill for skill in job_skills}

    # Find matches
    matched_skills_lower = set(resume_skills_map.keys()) & set(job_skills_map.keys())

    # Retrieve original skill names
    matched_skills = [resume_skills_map[skill] for skill in matched_skills_lower]
    missing_skills = [job_skills_map[skill] for skill in set(job_skills_map.keys()) - set(resume_skills_map.keys())]

    return matched_skills, missing_skills


@app.route("/", methods=["GET", "POST"])
def upload_resume():
    
    global job_descriptionGlobal  # 🔹 Use `global` to modify it inside this function
    global resume_YearsExperience  # Ensure we use the latest stored resume_YearsExperience
    
    if request.method == "POST":
        file = request.files.get("resume")
        job_description = request.form.get("job_description", "")
        #print('job_description in upload_resume', job_description)
        #print('job_descriptionGlobal in upload_resume', job_descriptionGlobal)

        if not file or file.filename.strip() == "":
            return jsonify({"error": "No file uploaded"}), 400
        if not job_description:
            return jsonify({"error": "No job description provided"}), 400

        # 🔹 Store job description in the global variable
        job_descriptionGlobal = job_description  
        #print(f"[DEBUG] Stored Job Description: {job_descriptionGlobal}")  # ✅ Debugging Output
        
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)

        try:
            file.save(filepath)
        except Exception as e:
            print(f"Error saving file: {e}", file=sys.stderr)
            return jsonify({"error": "Failed to save file"}), 500

        resume_text = extract_text(filepath)
        if not resume_text:
            return jsonify({"error": "Failed to extract text from resume"}), 500

        email = extract_email(resume_text)
        phone = extract_phone(resume_text)
        phone = phone.replace('\r', '').replace('\n', '')
        print("After Removing line breaks from Phone number", phone)
        skills = extract_skills(resume_text)
        single_skills = match_single_digit_skills(resume_text, singledigit_skills)
        skills = skills + single_skills                             # Combine both skills into single list of skills
        skills = deduplicate_skills(skills)                         # Removing duplicate skills
            
        experience_level = categorize_experience(resume_text)
        match_score = match_resume_to_job(resume_text, job_description)
        print("Match Score based on Resume and Job Description!!!", match_score)
        match_score, skill_overlap = adjust_match_score(match_score, experience_level, job_description, skills)
        print("BEFORE SAVING TO DB >>>>>>>>>>>",email,phone, match_score, experience_level, skill_overlap)
        print("BEFORE SAVING TO DB NUMERICAL>>",resume_YearsExperience, skill_overlap)
        #save_to_db(filename, email, phone, skills, round(match_score), experience_level)
        save_to_db(filename, email, phone, skills, round(match_score), experience_level,
        experience_years=resume_YearsExperience,
        num_skills_matched=skill_overlap
        )

        # ✅ Instead of returning JSON, redirect to show_resume()
        return redirect(url_for("show_resume", filename=filename))

    return render_template("upload.html")

# Extract single digit skills like "R", "AI", "C"
def match_single_digit_skills(text, singledigit_skills):
    # Construct a regex pattern that matches any of the keywords exactly as whole words
    pattern = r'\b(' + '|'.join(map(re.escape, singledigit_skills)) + r')\b'

    # Search for the keywords in the text (case-insensitive)
    matches = re.findall(pattern, text, re.IGNORECASE)

    # Remove duplicates by converting the list to a set
    unique_matches = set(matches)

    # Return the unique matches sorted
    return sorted(unique_matches)

@app.route("/dashboard")
def dashboard():
    return render_template("dashboard.html")

@app.route("/resume/<filename>")
def show_resume(filename):
    """Displays the extracted resume details in a structured format."""
    filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
    resume_text = extract_text(filepath)
    
    if not resume_text:
        return "<h2>Error: Failed to extract resume text.</h2>"

    # Extract key information
    email = extract_email(resume_text)
    phone = extract_phone(resume_text)
    skills = extract_skills(resume_text)
    single_skills = match_single_digit_skills(resume_text, singledigit_skills)
    skills = skills + single_skills                             # Combine both skills into single list
    skills = deduplicate_skills(skills)                         # Removing duplicate skills
    experience_level = categorize_experience(resume_text)

    return render_template("resume_display.html",
                           filename=filename,
                           email=email,
                           phone=phone,
                           skills=skills,
                           experience_level=experience_level,
                           resume_text=resume_text)



if __name__ == "__main__":
    try:
        app.run(host="127.0.0.1", port=5000, debug=False, use_reloader=False)
    except Exception as e:
        print(f"Error starting Flask app: {e}", file=sys.stderr)
        sys.exit(1)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [11/May/2025 17:32:12] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [11/May/2025 17:32:14] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [11/May/2025 17:32:45] "GET /dashboard HTTP/1.1" 200 -


After Removing line breaks from Phone number (510)-392-7033


and fails to parse leap day. The default behavior will change in Python 3.15
to either always raise an exception or to use a different default year (TBD).
To avoid trouble, add a specific year to the input & format.
See https://github.com/python/cpython/issues/70647.
  start_date = dateparser.parse(f"{start_month} {start_year}")
and fails to parse leap day. The default behavior will change in Python 3.15
to either always raise an exception or to use a different default year (TBD).
To avoid trouble, add a specific year to the input & format.
See https://github.com/python/cpython/issues/70647.
  end_date = dateparser.parse(f"{end_month} {end_year}")
POST / HTTP/1.1" 302 -2025 17:34:44] "


YEARS before globally storing 2.0
Match Score based on Resume and Job Description!!! 59.36
Intern Boost Applied for Junior
Experience_level => Junior
MatchScoreBefore boost => 59.36
MatchScoreAfter boost => 71.232
Matching Score Before -> Skills Match Boosting 71.232
[DEBUG] Skill Match Boost: 4 matching skills | New Score: 76.93056
BEFORE SAVING TO DB >>>>>>>>>>> sualehalam@gmail.com (510)-392-7033 76.93056 Junior 4
BEFORE SAVING TO DB NUMERICAL>> 2.0 4


127.0.0.1 - - [11/May/2025 17:34:45] "GET /resume/111RESUME_Muhammad_Sualeh_Alam.pdf HTTP/1.1" 200 -


YEARS before globally storing 2.0


127.0.0.1 - - [11/May/2025 17:35:09] "GET /download/111RESUME_Muhammad_Sualeh_Alam.pdf HTTP/1.1" 200 -


[DEBUG] Extracted Skills from Job Description: ['Python', 'TensorFlow', 'PyTorch', 'Scikit-learn', 'Machine Learning', 'Data Science', 'Big-Data', 'Azure', 'Hadoop', 'Data Analytics', 'Statistical Modeling', 'Spark', 'Predictive Analytics']
Single Digit Skills !!!  ['AI', 'AWS', 'GCP', 'SQL']


### Exporting Original Data to CSV file

In [3]:
import pandas as pd
df = pd.read_sql("SELECT * FROM resumes", sqlite3.connect("resumes.db"))
df.to_csv("resume_data.csv", index=False)

In [13]:
def deduplicate_skills(skills):
    seen = {}
    for skill in skills:
        key = skill.lower()
        # Prefer capitalized skill if seen before in lowercase
        if key not in seen or skill[0].isupper():
            seen[key] = skill
    return list(seen.values())


skills = ["Python", "SQL", "excel", "Excel", "R", "python"]
cleaned_skills = deduplicate_skills(skills)
print(cleaned_skills)


['Python', 'SQL', 'Excel', 'R']


In [None]:
job_description = """
"Machine Learning Associate (Entry-Level) – Adobe
The Opportunity
Adobe is looking for a Machine Learning Associate (Entry-Level) who will work on AI and machine learning applications to help Adobe better understand, optimize, and improve customer experiences. This role is ideal for candidates with foundational knowledge in machine learning, predictive analytics, and data science techniques who are eager to gain hands-on experience working with real-world big-data problems.

You will receive mentorship and guidance from experienced data scientists and machine learning engineers to build a strong foundation in AI-driven analytics and model development.

What You’ll Do
Assist in developing and implementing machine learning models for real-time customer insights, media optimization, and product recommendations.
Work with structured and unstructured data to generate insights using Python, SQL, and machine learning frameworks.
Collaborate with cross-functional teams, including data scientists, product managers, and software engineers, to improve model efficiency and interpretability.
Apply fundamental data science techniques such as statistical modeling, feature engineering, and algorithm tuning.
Participate in team learning sessions, code reviews, and collaborative brainstorming to enhance skills and knowledge in AI.
What You Need to Succeed
Bachelor’s or Master’s degree in Computer Science, Data Science, or a related field.
Strong foundational knowledge of machine learning, statistics, and data analytics.
Hands-on experience with Python, SQL, and machine learning frameworks (scikit-learn, TensorFlow, or PyTorch).
Understanding of basic statistical modeling and predictive analytics.
Ability to analyze data, apply algorithms, and interpret model results.
Excellent communication skills and willingness to learn from experienced professionals.
Ability to work collaboratively in a hybrid environment and contribute to team projects.
Bonus Skills (Nice to Have, but Not Required)
Familiarity with cloud platforms such as AWS, GCP, or Azure.
Some exposure to big data technologies (Hadoop, Spark, Databricks)."

"""
experience_mapping = {
        "Intern": ["Intern"],
        "Junior": ["Entry-Level", "0-2 years", "Junior"],
        "Mid-Level": ["Mid-Level", "3-7 years"],
        "Senior": ["Senior", "7+ years"]
    }


for mapped_exp in experience_mapping["Mid-Level"]:
    print(mapped_exp)
    if mapped_exp.lower() in job_description.lower():
        print("found")