In [79]:
import os
import pandas as pd
import re
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import imaplib
import email
from email.header import decode_header
import PyPDF2
import docx
import datetime
import logging

In [80]:
def connect_to_email(email_address, password, imap_server="imap.gmail.com"):
    # Connect to the mail server
    mail = imaplib.IMAP4_SSL(imap_server)
    mail.login(email_address, password)
    mail.select("inbox")  # Select inbox or another mailbox
    
    return mail

def fetch_emails_with_attachments(mail, search_criteria="UNSEEN SUBJECT 'Resume'"):
    # Search for specific emails (e.g., unread emails with "Resume" in subject)
    status, data = mail.search(None, search_criteria)
    
    email_ids = data[0].split()
    attachments_info = []
    
    for email_id in email_ids:
        status, email_data = mail.fetch(email_id, "(RFC822)")
        raw_email = email_data[0][1]
        msg = email.message_from_bytes(raw_email)
        
        # Extract sender details
        sender_email = msg["from"]
        subject = decode_header(msg["subject"])[0][0]
        if isinstance(subject, bytes):
            subject = subject.decode()
        
        # Look for attachments
        for part in msg.walk():
            if part.get_content_maintype() == "multipart":
                continue
            if part.get("Content-Disposition") is None:
                continue
                
            filename = part.get_filename()
            if filename:
                if filename.endswith(('.pdf', '.docx')):
                    # Save the attachment
                    file_path = os.path.join("resumes", filename)
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    
                    with open(file_path, "wb") as f:
                        f.write(part.get_payload(decode=True))
                    
                    attachments_info.append({
                        "sender_email": sender_email,
                        "subject": subject,
                        "filename": filename,
                        "file_path": file_path
                    })
    
    return attachments_info
    

In [81]:
from dotenv import load_dotenv
load_dotenv()

your_email = os.getenv("EMAIL_USER")
your_app_password = os.getenv("EMAIL_PASS")


mail = connect_to_email(your_email, your_app_password)
attachments = fetch_emails_with_attachments(mail)

print("Attachments found:")
for att in attachments:
    print(att)

Attachments found:
{'sender_email': 'Vaibhav Bhardwaj <bhardwajvaibhav943@gmail.com>', 'subject': 'Resume', 'filename': 'Intro page sparsh.pdf', 'file_path': 'resumes\\Intro page sparsh.pdf'}
{'sender_email': 'Vaibhav Bhardwaj <bhardwajvaibhav943@gmail.com>', 'subject': 'Resume', 'filename': 'Sparsh resume-1.pdf', 'file_path': 'resumes\\Sparsh resume-1.pdf'}
{'sender_email': 'Vaibhav Bhardwaj <bhardwajvaibhav943@gmail.com>', 'subject': 'Resume', 'filename': 'resume (2).pdf', 'file_path': 'resumes\\resume (2).pdf'}
{'sender_email': 'Vaibhav Bhardwaj <bhardwajvaibhav943@gmail.com>', 'subject': 'Resume', 'filename': 'resume avi.pdf', 'file_path': 'resumes\\resume avi.pdf'}
{'sender_email': 'Vaibhav Bhardwaj <bhardwajvaibhav943@gmail.com>', 'subject': 'Resume', 'filename': 'Tanmay Arora Resume.pdf', 'file_path': 'resumes\\Tanmay Arora Resume.pdf'}
{'sender_email': 'Vaibhav Bhardwaj <bhardwajvaibhav943@gmail.com>', 'subject': 'Resume', 'filename': 'meenal resume (1).pdf', 'file_path': 'resu

In [82]:
import os
import PyPDF2
import docx

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

def extract_text_from_docx(docx_path):
    doc = docx.Document(docx_path)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + "\n"
    return text

def parse_resume(file_path):
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    else:
        raise ValueError(f"Unsupported file format: {file_path}")

resumes_folder = os.path.join(os.getcwd(), "resumes")
all_resumes = {}

for filename in os.listdir(resumes_folder):
    file_path = os.path.join(resumes_folder, filename)
    if filename.endswith(('.pdf', '.docx')):
        try:
            text = parse_resume(file_path)
            all_resumes[filename] = text  # Store full resume text
        except Exception as e:
            print(f"Error parsing {filename}: {e}")


In [87]:
def extract_and_mask_personal_info(resume_text, filename=""):
    import re

    lines = resume_text.strip().split("\n")
    top_lines = [line.strip() for line in lines[:10] if line.strip() and not re.search(r"(email|phone|contact|linkedin|github|address|india)", line.lower())]

    name = None
    # Look for proper name format in top lines
    for line in top_lines:
        words = line.split()
        if 2 <= len(words) <= 3:
            if all(w[0].isupper() and w[1:].islower() for w in words):
                name = " ".join(words)
                break
            if all(w.isupper() for w in words):  # all caps
                name = " ".join([w.capitalize() for w in words])
                break

    # Fallback: use email username as name
    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
    email_match = re.search(email_pattern, resume_text)
    email = email_match.group(0) if email_match else "unknown@example.com"

    # Remove common prefixes like 'Email-', 'Mail:', etc.
    if email.lower().startswith("email-"):
        email = email.replace("Email-", "").replace("email-", "")

    # If name not found earlier, infer from email
    if not name and email != "unknown@example.com":
        username = email.split("@")[0]
        name_parts = re.findall(r"[A-Za-z]+", username)
        name = " ".join([p.capitalize() for p in name_parts[:2]]) if name_parts else "Unknown"

    name = name if name else "Unknown"

    # Masked Name
    masked_name = name[0] + "*" * (len(name) - 2) + name[-1] if len(name) > 2 else "***"

    # Masked Email
    email_parts = email.split('@')
    if len(email_parts) == 2:
        username, domain = email_parts
        masked_email = username[0] + "*" * (len(username) - 2) + username[-1] + "@" + domain
    else:
        masked_email = "***@example.com"

    return {
        "full_name": name,
        "email": email,
        "masked_name": masked_name,
        "masked_email": masked_email
      }


def extract_experience_years(resume_text):
    # Look for patterns like "5 years", "3+ years", etc.
    year_patterns = [
        r"(\d+)[\+]?\s+years",
        r"(\d+)[\+]?\s+yrs",
        r"(\d{4})\s*[-–—]\s*(2\d{3}|present|current|now)",
    ]
    
    total_years = 0
    for pattern in year_patterns:
        matches = re.findall(pattern, resume_text, re.IGNORECASE)
        for match in matches:
            if isinstance(match, tuple):  # For date ranges
                start_year = int(match[0])
                if match[1].lower() in ["present", "current", "now"]:
                    end_year = datetime.datetime.now().year
                else:
                    try:
                        end_year = int(match[1])
                    except ValueError:
                        continue
                total_years += end_year - start_year
            else:
                try:
                    years = int(match)
                    total_years += years
                except ValueError:
                    continue
    
    # If we found multiple instances, take an average (simple approach)
    if len(year_patterns) > 0:
        total_years = min(total_years, 20)  # Cap at 20 years to avoid outliers
    
    return total_years

def check_ai_experience(resume_text):
    ai_keywords = [
        "machine learning", "deep learning", "neural network", "tensorflow",
        "pytorch", "keras", "scikit-learn", "nlp", "natural language processing",
        "computer vision", "cv", "ai", "artificial intelligence", "data science",
        "predictive modeling", "reinforcement learning", "ml ops", "llm", 
        "large language model", "transformers", "gpt", "bert", "data mining","numpy","pandas","Exploratory data analysis"
        , "python" ,"Hugging Face", "NLP" , 'tensor flow' , 'power bi' ,'seaborn','nltk','model'
    ]
    
    ai_exp = []
    for keyword in ai_keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', resume_text.lower()):
            ai_exp.append(keyword)
    
    return {
        "has_ai_experience": len(ai_exp) > 0,
        "ai_skills": ai_exp
    }

def calculate_jd_match_score(resume_text, job_description):
    # Convert both to lowercase
    resume_lower = resume_text.lower()
    jd_lower = job_description.lower()
    
    # Extract key skills/requirements from JD (simplified approach)
    # In a real system, you would use NER or more sophisticated extraction
    skill_pattern = r"(?:knowledge|experience|proficient|skill|familiarity).*?(in|with|of)\s+([a-zA-Z\s,]+)"
    jd_skills_matches = re.findall(skill_pattern, jd_lower)
    
    jd_skills = []
    for match in jd_skills_matches:
        skills = match[1].split(',')
        for skill in skills:
            cleaned_skill = skill.strip()
            if len(cleaned_skill) > 2:  # Avoid very short terms
                jd_skills.append(cleaned_skill)
    
    # Add common programming languages and tools
    common_skills = ["python", "java", "javascript", "c++", "sql", "nosql", 
                 "aws", "azure", "docker", "kubernetes", "git", "agile"]
    
    all_skills = set(jd_skills + common_skills)
    
    # Count matches
    matches = 0
    for skill in all_skills:
        if re.search(r'\b' + re.escape(skill) + r'\b', resume_lower):
            matches += 1
    
    # Calculate match percentage
    match_score = min(100, int((matches / max(1, len(all_skills))) * 100))
    
    return match_score

def score_resume(resume_text, job_description=""):
    # Initialize scores dictionary
    scores = {
        "formatting": 0,
        "experience": 0,
        "education": 0,
        "skills": 0,
        "overall": 0
    }
    
    # Score formatting (length, sections, etc.)
    word_count = len(resume_text.split())
    if 300 <= word_count <= 1000:
        scores["formatting"] = 20  # Good length
    elif word_count < 300:
        scores["formatting"] = 10  # Too short
    else:
        scores["formatting"] = 15  # Might be too verbose
    
    # Check for clear sections
    sections = ["experience", "education", "skills", "projects", "achievements"]
    section_count = sum(1 for section in sections if re.search(r'\b' + re.escape(section) + r'\b', resume_text.lower()))
    scores["formatting"] += min(10, section_count * 2)
    
    # Experience score
    years = extract_experience_years(resume_text)
    if years >= 5:
        scores["experience"] = 25
    elif years >= 3:
        scores["experience"] = 20
    elif years >= 1:
        scores["experience"] = 15
    else:
        scores["experience"] = 10
    
    # Education score
    edu_keywords = ["degree", "bachelor", "master", "phd", "university", "college"]
    edu_count = sum(1 for keyword in edu_keywords if re.search(r'\b' + re.escape(keyword) + r'\b', resume_text.lower()))
    scores["education"] = min(20, edu_count * 5)
    
    # Skills score
    tech_skills = ["python", "java", "javascript", "html", "css", "react", "node", 
                  "angular", "vue", "sql", "nosql", "mongodb", "aws", "azure", 
                  "gcp", "docker", "kubernetes", "git", "ci/cd", "agile", "scrum"]
    
    skill_count = sum(1 for skill in tech_skills if re.search(r'\b' + re.escape(skill) + r'\b', resume_text.lower()))
    scores["skills"] = min(25, skill_count * 2)
    
    # Calculate overall score
    scores["overall"] = scores["formatting"] + scores["experience"] + scores["education"] + scores["skills"]
    
    # Calculate JD match score if provided
    jd_match = calculate_jd_match_score(resume_text, job_description) if job_description else 0
    
    # Get AI experience
    ai_experience = check_ai_experience(resume_text)
    
    return {
        "scores": scores,
        "years_experience": years,
        "jd_match_score": jd_match,
        "ai_experience": ai_experience
    }


In [84]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import re
import os
import datetime

# Function to send email
def send_email(recipient_email, subject, body):
    # Retrieve email credentials from environment variables
    from_email = os.getenv("EMAIL_USER")
    password = os.getenv("EMAIL_PASS")
    
    # Check if credentials are loaded correctly
    if not from_email or not password:
        raise ValueError("Email credentials not found. Make sure to set the EMAIL_USER and EMAIL_PASS environment variables.")
    
    # Set up the MIME
    message = MIMEMultipart()
    message["From"] = from_email
    message["To"] = recipient_email
    message["Subject"] = subject
    
    # Add the body to the email
    message.attach(MIMEText(body, "plain"))
    
    # Send the email
    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(from_email, password)
            server.sendmail(from_email, recipient_email, message.as_string())
        print(f"Email sent to {recipient_email}")
    except Exception as e:
        print(f"Failed to send email to {recipient_email}. Error: {e}")
        
# Function to compose the email body
def compose_email_body(masked_info, jd_score, final_score, ai_experience):
    # Start building the email content
    body = f"Dear {masked_info['full_name']},\n\n"
    
    body += f"Thank you for submitting your resume. Here's a detailed breakdown of your CV:\n\n"
    
    # CV Score
    body += f"🔹 **Overall Resume Score**: {final_score['scores']['overall']}/100\n\n"
    
    # Breakdown of the score components
    body += f"**Breakdown of Score Components**:\n"
    body += f"📋 **Formatting**: {final_score['scores']['formatting']}/20\n"
    body += f"💼 **Experience**: {final_score['scores']['experience']}/25\n"
    body += f"🎓 **Education**: {final_score['scores']['education']}/20\n"
    body += f"💻 **Skills**: {final_score['scores']['skills']}/25\n\n"
    
    # AI Experience (if applicable)
    if ai_experience["has_ai_experience"]:
        body += f"🤖 **AI Experience**: You have experience in the following areas: {', '.join(ai_experience['ai_skills'])}\n\n"
    else:
        body += f"🤖 **AI Experience**: No notable AI experience found.\n\n"
    
    # Notable feedback (strengths/weaknesses)
    if final_score['scores']['overall'] < 50:
        body += "⚠️ **Feedback**: Your CV needs improvements in terms of formatting, experience, and skills. Consider enhancing your technical skills and ensuring your resume has a clear structure.\n\n"
    elif final_score['scores']['overall'] < 70:
        body += "⚠️ **Feedback**: Your CV is decent, but there’s room for improvement in formatting and showcasing experience. Consider highlighting key accomplishments more clearly.\n\n"
    else:
        body += "🎉 **Feedback**: Excellent CV! You’ve demonstrated strong skills and experience, especially in AI and programming. Keep up the great work!\n\n"
    
    # Optional: Next steps or encouragement
    body += "🌟 **Next Steps**: We encourage you to keep improving your skills, particularly in the areas of machine learning and AI. Best of luck in your job search!\n\n"
    
    body += "Best regards,\nYour Resume Review Team"
    
    return body

# Modify the existing process_resumes_in_folder function to send emails
def process_resumes_in_folder(resumes_folder, job_description_keywords=""):
    for filename in os.listdir(resumes_folder):
        if filename.endswith(('.pdf', '.docx')):  # Check if file is a PDF or DOCX
            file_path = os.path.join(resumes_folder, filename)
            print(f"\n📄 Processing: {filename}")
            try:
                # Parse the resume text from the file
                resume_text = parse_resume(file_path)
                
                # Extract and mask personal information (name, email)
                masked_info = extract_and_mask_personal_info(resume_text)
                
                # Calculate JD match score
                jd_score = calculate_jd_match_score(resume_text, job_description_keywords)
                
                # Extract batch years (from experience)
                batch = extract_batch_years(resume_text)
                
                # Check for AI experience
                ai_exp = check_ai_experience(resume_text)
                
                # Calculate overall resume score
                final_score = score_resume(resume_text, job_description_keywords)
                
                # Compose email body
                email_body = compose_email_body(masked_info, jd_score, final_score, ai_exp)
                
                # Send the email to the masked email address
                send_email(masked_info['email'], f"Your CV Review: Score and Feedback for {masked_info['full_name']}", email_body)
                
                # Display the extracted and computed information
                print(f"👤 Full Name: {masked_info['full_name']}")
                print(f"📧 Email: {masked_info['email']}")
                print(f"🔒 Masked Name: {masked_info['masked_name']}")
                print(f"🔒 Masked Email: {masked_info['masked_email']}")
                print(f"🎯 JD-CV Match Score: {jd_score}%")
                print(f"🎓 Batch Years: {batch}")
                print(f"🤖 AI Experience: {', '.join(ai_exp['ai_skills'])}")
                print(f"📊 Resume Score: {final_score['scores']['overall']}/100")
            except Exception as e:
                print(f"❌ Error processing {filename}: {e}")

# Example usage
resumes_folder = r"C:\Users\ASUS\Desktop\cv scorer\working\resumes"
job_description_keywords = '''python, machine learning, deep learning, data science, data analysis, pandas, numpy, 
scikit-learn, tensorflow, keras, pytorch, model deployment, docker, flask, streamlit, neural networks, convolutional
neural networks, recurrent neural networks, natural language processing, nlp, transformers, llm, bert, gpt, chatgpt,
huggingface, prompt engineering, langchain, openai, computer vision, opencv, sql, mongodb, powerbi, matplotlib, seaborn, feature engineering, model tuning, cross-validation, statistics, regression, 
classification, clustering, unsupervised learning, supervised learning, time series analysis, ai, artificial intelligence, big data, spark, spacy'''

process_resumes_in_folder(resumes_folder, job_description_keywords)



📄 Processing: Intro page sparsh.pdf
Email sent to unknown@example.com
👤 Full Name: Unknown
📧 Email: unknown@example.com
🔒 Masked Name: U*****n
🔒 Masked Email: u*****n@example.com
🎯 JD-CV Match Score: 0%
🎓 Batch Years: 2024 - 2027
🤖 AI Experience: 
📊 Resume Score: 25/100

📄 Processing: meenal resume (1).pdf
Email sent to khandelwal.meenal12@gmail.com
👤 Full Name: Meenal Khandelwal
📧 Email: khandelwal.meenal12@gmail.com
🔒 Masked Name: M***************l
🔒 Masked Email: k*****************2@gmail.com
🎯 JD-CV Match Score: 25%
🎓 Batch Years: 2017 - 2025
🤖 AI Experience: nlp, ai, data science, numpy, pandas, python, seaborn
📊 Resume Score: 70/100

📄 Processing: resume (2).pdf
Email sent to abhigyanranjanofficial@gmail.com
👤 Full Name: Abhigyan Ranjan
📧 Email: abhigyanranjanofficial@gmail.com
🔒 Masked Name: A*************n
🔒 Masked Email: a********************l@gmail.com
🎯 JD-CV Match Score: 33%
🎓 Batch Years: 2020 - 2024
🤖 AI Experience: machine learning, deep learning, ai, artificial intelli