In [14]:
import os
import re
import smtplib
from email.mime.text import MIMEText
import pdfplumber
import glob
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# List of common skills to match
COMMON_SKILLS = [
    "Python", "Flask", "Django", "FastAPI", "Pandas", "NumPy", "SciPy", "Matplotlib", "Seaborn", 
    "TensorFlow", "Keras", "PyTorch", "Scikit-learn", "NLTK", "SpaCy", "SQLAlchemy", "Celery",
    "Java", "Spring", "Spring Boot", "Hibernate", "Maven", "Gradle", "JUnit", "JSP", "JSF", 
    "JavaFX", "Swing", "JDBC", "Apache Camel", "Kotlin",
    "JavaScript", "Node.js", "React", "Angular", "Vue.js", "Next.js", "Express.js", "TypeScript", 
    "Redux", "jQuery", "Electron", "Gatsby", "ES6", "Svelte",
    "C++", "Boost", "Qt", "OpenCV", "STL", "CUDA", "OpenMP", "CMake", "GTest", 
    "C#", ".NET", "ASP.NET", "Entity Framework", "Blazor", "Xamarin", "Unity", "WPF", "WinForms", 
    "LINQ", "Razor Pages", "MVC", "NUnit", "Azure Functions", "SignalR",
    "SQL", "MySQL", "PostgreSQL", "SQLite", "MongoDB", "Redis", "Oracle", "Cassandra", "Elasticsearch", 
    "Microsoft SQL Server", "PL/SQL", "T-SQL",
    "R", "ggplot2", "Shiny", "dplyr", "tidyverse", "caret", "data.table", "rpart", "lubridate", "forecast", 
    "R Markdown", "R Shiny",
    "HTML", "CSS", "Sass", "LESS", "Tailwind CSS", "Bootstrap", "Bulma", "Materialize", "WebAssembly", 
    "Webpack", "Gulp", "JAMstack", "Grunt", "Pug", "Handlebars",
    "Docker", "Kubernetes", "AWS", "Azure", "Google Cloud Platform", "Terraform", "Ansible", "Jenkins", 
    "CI/CD", "Helm", "OpenShift", "Serverless", "CloudFormation", "Chef", "Puppet", "Vagrant",
    "Hadoop", "Spark", "Kafka", "Airflow", "Hive", "Pig", "HBase", "Presto", "Dask", "PySpark", "Google BigQuery",
    "Machine Learning", "Deep Learning", "NLP", "OpenAI GPT", "BERT", "AutoML", "Reinforcement Learning", 
    "Computer Vision", "Generative Adversarial Networks (GANs)", "XGBoost", "LightGBM", "CatBoost",
    "Swift", "Objective-C", "Kotlin", "React Native", "Flutter", "Dart", "Xcode", "Android Studio", 
    "iOS Development", "Android Development",
    "Git", "GitHub", "GitLab", "Bitbucket", "Subversion", "Travis CI", "CircleCI", "Jenkins", "GitFlow", 
    "Perforce",
    "Selenium", "Cypress", "Appium", "Postman", "JMeter", "JUnit", "Mockito", "Cucumber", "PyTest", 
    "Robot Framework", "TestNG", "Jest", "Mocha", "Chai",
    "Agile", "Scrum", "Kanban", "JIRA", "Confluence", "Trello", "Slack", "Basecamp", "Microsoft Project", 
    "Asana", "Monday.com", "ClickUp",
    "GraphQL", "REST API", "SOAP", "gRPC", "Microservices", "Event-Driven Architecture", "Apache Kafka", 
    "RabbitMQ", "Message Queues", "OpenAPI", "Swagger", "OAuth", "JWT", "Firebase", "Heroku",
    "Bash", "PowerShell", "Perl", "Ruby", "Shell Scripting", "Groovy", "Lua",
    "Istio", "Envoy", "Linkerd", "Consul", "Prometheus", "Grafana", "Jaeger", "Fluentd", "Elastic Stack (ELK)", 
    "Logstash", "ECS", "EKS", "Fargate",
]

# Mapping of skills to suggestions for improvement and suggested roles
SKILL_IMPROVEMENT_MAPPING = {
    "Python": {
        "improvements": ["Data Structures and Algorithms", "Web Development with Flask or Django", "Machine Learning Basics"],
        "roles": ["Data Scientist", "Backend Developer", "Machine Learning Engineer"]
    },
    "Django": {
        "improvements": ["Advanced Django ORM", "Scaling Django Apps", "Security Best Practices"],
        "roles": ["Backend Developer", "Full Stack Developer"]
    },
    "Java": {
        "improvements": ["Spring Framework", "Java EE", "Microservices"],
        "roles": ["Java Developer", "Backend Developer", "Software Engineer"]
    },
    "React": {
        "improvements": ["State Management with Redux", "Server-Side Rendering", "TypeScript"],
        "roles": ["Frontend Developer", "Full Stack Developer"]
    },
    "SQL": {
        "improvements": ["Database Design", "Stored Procedures", "Data Warehousing"],
        "roles": ["Database Administrator", "Data Analyst"]
    },
    # Add more skills with suggestions and roles as needed
}

default_names = [
    "Aarav", "Ayaan", "Vivaan", "Aditya", "Vihaan", "Arjun", "Sai", "Ananya", "Saanvi", "Ishaan",
    "Neha", "Priya", "Sneha", "Riya", "Siddharth", "Karan", "Rahul", "Pooja", "Simran", "Tanvi",
    "Rohit", "Kritika", "Avni", "Anika", "Nisha", "Ritika", "Maya", "Dhruv", "Dev", "Nikhil",
    "Kavya", "Siddhi", "Harsh", "Rohan", "Anjali", "Manan", "Suman", "Kunal", "Sanya", "Meera",
    
    # Common Foreign Names
    "James", "John", "Robert", "Michael", "William", "David", "Richard", "Charles", "Joseph", "Thomas",
    "Christopher", "Daniel", "Matthew", "Anthony", "Mark", "Donald", "Steven", "Paul", "Andrew", "Joshua",
    "Laura", "Linda", "Susan", "Jessica", "Sarah", "Karen", "Nancy", "Lisa", "Betty", "Margaret",
    "Helen", "Sandra", "Ashley", "Dorothy", "Kimberly", "Emily", "Michelle", "Carol", "Amanda", "Melissa",
    
    # Additional names for diversity
    "Isabella", "Alexander", "Sofia", "Oliver", "Liam", "Emma", "Mason", "Ethan", "Ava", "Sophia",
    "Noah", "Liam", "Charlotte", "Elijah", "Harper", "Lucas", "Mia", "Benjamin", "Amelia", "Logan"
]



import re
import os
import fitz


class NameExtractor:
    def __init__(self, common_skills):
        self.patterns = [
            r'^[A-Z][a-z]+(?:\s[A-Z][a-z]+)+$',  # First and last name
            r'^[A-Z][a-z]+(?:\s[A-Z]\.){1}\s[A-Z][a-z]+$',  # First name, middle initial, last name
            r'^[A-Z][a-z]+(?:\s[A-Z][a-z]+){2}$'  # Full name with middle name
        ]
        self.common_skills = common_skills
    def extract_name(self, resume_text, file_path):
        file_name = os.path.basename(file_path).replace('.pdf', '')
        words_in_text = resume_text.split()
        # Try to match the file name or parts of it with the extracted text
        for word in words_in_text:
            if word.lower() in file_name.lower():
                return word  # Return the matching word as the candidate name
        # Split resume text into lines
        lines = resume_text.splitlines()
        for line in lines[:3]:  # Check the first three lines for a valid name
            if self.is_valid_name(line):
                if not self.contains_common_skill(line):
                    return line.strip()  # Return as candidate name
        # As a last resort, return "Unknown"
        return "Unknown"
    def is_valid_name(self, line):
        return any(re.match(pattern, line) for pattern in self.patterns)
    def contains_common_skill(self, line):
        return any(re.search(r'\b' + re.escape(skill) + r'\b', line, re.IGNORECASE) for skill in self.common_skills)
def extract_text_with_pymupdf(file_path):
    text = ""
    try:
        pdf_document = fitz.open(file_path)
        for page in pdf_document:
            text += page.get_text()
        pdf_document.close()
    except Exception as e:
        print("Error reading PDF:", e)
    return text.strip()
def extract_name_from_text(text):
    name_patterns = [
        re.compile(r'^[A-Z][a-zA-Z\s\-\.]+$', re.MULTILINE),  # Names typically start with a capital letter
        re.compile(r'Name:\s*([A-Za-z\s\-\.]+)', re.IGNORECASE),  # Format "Name: John Doe"
        re.compile(r'([A-Za-z\s\-\.]+)\s+CV', re.IGNORECASE),  # Names followed by "CV" or "Resume"
        re.compile(r'([A-Za-z\s\-\.]+)\s+Resume', re.IGNORECASE),  # Names followed by "Resume"
        re.compile(r'([A-Za-z\s\-\.]+)\s+Profile', re.IGNORECASE),  # Names followed by "Profile"
        re.compile(r'([A-Za-z\s\-\.]+)\s+Summary', re.IGNORECASE),  # Names followed by "Summary"
    ]
    
    for pattern in name_patterns:
        matches = pattern.findall(text)
        for match in matches:
            if match and match.strip():
                return match.strip()
    
    return "Name not found"
def extract_names_from_folder(folder_path):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.pdf'):
            file_path = os.path.join(folder_path, filename)
            print(f"Processing: {file_path}")
            pdf_text = extract_text_with_pymupdf(file_path)
            name = extract_name_from_text(pdf_text)
            results.append({'filename': filename, 'name': name})
    return results

# Extract skills from job description dynamically
def extract_skills_from_job_description(job_description):
    job_description_lower = job_description.lower()
    matched_skills = []

    for skill in COMMON_SKILLS:
        if skill.lower() in job_description_lower:
            matched_skills.append(skill)

    return matched_skills

# Extract text from PDF using pdfplumber
def extract_text_with_pdfplumber(resume_path):
    try:
        with pdfplumber.open(resume_path) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text() if page.extract_text() else ""
        return text
    except Exception as e:
        print(f"Failed to extract text from {resume_path}: {e}")
        return None

# Extract actual skills using predefined list of common skills
def extract_skills_with_nlp(resume_text):
    extracted_skills = []
    for skill in COMMON_SKILLS:
        if skill.lower() in resume_text.lower():
            extracted_skills.append(skill)
    return extracted_skills

# Match extracted skills with job skills
def match_skills(extracted_skills, job_skills):
    matched_skills = list(set(extracted_skills) & set(job_skills))
    match_percentage = (len(matched_skills) / len(job_skills)) * 100 if job_skills else 0
    return matched_skills, match_percentage

# Extract email address from resume text
def extract_email(resume_text):
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    emails = re.findall(email_pattern, resume_text)
    return emails[0] if emails else None

# Generate personalized suggestions based on candidate's skills
def generate_suggestions(extracted_skills):
    improvement_suggestions = set()
    suggested_roles = set()

    for skill in extracted_skills:
        if skill in SKILL_IMPROVEMENT_MAPPING:
            improvement_suggestions.update(SKILL_IMPROVEMENT_MAPPING[skill]["improvements"])
            suggested_roles.update(SKILL_IMPROVEMENT_MAPPING[skill]["roles"])

    return improvement_suggestions, suggested_roles

# Send email function with personalized suggestions for rejected candidates
def send_email(recipient_email, candidate_name, match_percentage, extracted_skills, improvement_suggestions, suggested_roles):
    sender_email = "jesperdeni002@gmail.com"
    sender_password = "tzij srvn lidi waqb"

    if match_percentage > 50:
        subject = "Congratulations on Advancing to the Next Round!"
        body = f"""
        Dear {candidate_name},

        We are pleased to inform you that you have successfully completed the first round of interviews for the position. Congratulations!

        We would like to invite you to the next round of interviews.

        Please confirm your availability.

        Best regards,
        [Your Name]
        [Your Job Title]
        [Company Name]
        [Your Phone Number]
        """
        print(f"Selected: {candidate_name} with match percentage {match_percentage:.2f}%")
    else:
        subject = "Thank You for Your Application"
        body = f"""
        Dear {candidate_name},

        Thank you for applying for the position. While we appreciate your interest, we regret to inform you that we will not be progressing with your application at this time.

        However, we noticed your skills in the following areas: {', '.join(extracted_skills)}.

        Here are some suggestions for improvement:
        - {', '.join(improvement_suggestions)}

        Suggested roles based on your skills:
        - {', '.join(suggested_roles)}

        We encourage you to apply again in the future.

        Best regards,
        [Your Name]
        [Your Job Title]
        [Company Name]
        [Your Phone Number]
        """

    msg = MIMEText(body)
    msg['Subject'] = subject
    msg['From'] = sender_email
    msg['To'] = recipient_email

    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, sender_password)
        server.sendmail(sender_email, recipient_email, msg.as_string())

# Main function to process resumes and send emails
def process_resumes(job_description, resume_folder_path):
    job_skills = extract_skills_from_job_description(job_description)
    print(f"Job skills extracted: {job_skills}")

    for resume_path in glob.glob(os.path.join(resume_folder_path, "*.pdf")):
        resume_text = extract_text_with_pdfplumber(resume_path)
        if resume_text:
            email = extract_email(resume_text)

            # Pass COMMON_SKILLS and default_names when initializing NameExtractor
            name_extractor = NameExtractor(COMMON_SKILLS, default_names)
            candidate_name = name_extractor.extract_name(resume_text, resume_path)  # Pass resume_path

            extracted_skills = extract_skills_with_nlp(resume_text)
            matched_skills, match_percentage = match_skills(extracted_skills, job_skills)

            print(f"Candidate Name: {candidate_name}")

            # Print matched details for clarity
            print(f"Processed {candidate_name}:")
            print(f"  - Extracted Skills: {extracted_skills}")
            print(f"  - Matched Skills: {matched_skills}")
            print(f"  - Match Percentage: {match_percentage:.2f}%")

            if email:
                improvement_suggestions, suggested_roles = generate_suggestions(extracted_skills)
                send_email(email, candidate_name, match_percentage, extracted_skills, improvement_suggestions, suggested_roles)
            else:
                print(f"  - No email found for {candidate_name}")




# Example usage
job_description = input()
resume_folder_path = "D:\\Project\\New folder (2)\\resumes"
process_resumes(job_description, resume_folder_path) 



Job skills extracted: ['Python', 'SQL', 'R', 'Machine Learning']
Candidate Name: a
Processed a:
  - Extracted Skills: ['Java', 'JavaScript', 'Node.js', 'React', 'R', 'HTML', 'CSS', 'Webpack', 'Git']
  - Matched Skills: ['R']
  - Match Percentage: 25.00%
Candidate Name: a
Processed a:
  - Extracted Skills: ['Java', 'JavaScript', 'Node.js', 'React', 'R', 'HTML', 'CSS', 'Webpack', 'Git']
  - Matched Skills: ['R']
  - Match Percentage: 25.00%
Candidate Name: Alex
Processed Alex:
  - Extracted Skills: ['Python', 'C++', 'Boost', 'STL', 'CMake', 'R', 'Git']
  - Matched Skills: ['R', 'Python']
  - Match Percentage: 50.00%
Failed to extract text from D:\Project\New folder (2)\resumes\Alice Johnson.pdf: No /Root object! - Is this really a PDF?
Candidate Name: A
Processed A:
  - Extracted Skills: ['Python', 'Java', 'JavaScript', 'C++', 'Unity', 'SQL', 'MySQL', 'R', 'HTML', 'CSS', 'LESS', 'Machine Learning']
  - Matched Skills: ['SQL', 'R', 'Python', 'Machine Learning']
  - Match Percentage: 100.0