In [1]:
import os
import streamlit as st
import pdfplumber
import json
import spacy
import google.generativeai as genai
from docx import Document
from fpdf import FPDF
from dotenv import load_dotenv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load environment variables
load_dotenv(override=True)

GEMINI_API_KEY = "AIzaSyCCYQlEZYJ1CHp5Xj4XNtR8BVjP5S2m1-Q"

if not GEMINI_API_KEY:
    st.error("⚠ Gemini API key not found. Please set it in your .env file.")
    st.stop()

# Configure Gemini AI
genai.configure(api_key=GEMINI_API_KEY)

# Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

# Extract text from PDF
def extract_text_from_pdf(file):
    try:
        text = ""
        with pdfplumber.open(file) as pdf:
            for page in pdf.pages:
                extracted_text = page.extract_text()
                if extracted_text:
                    text += extracted_text + "\n"
        return text.strip()
    except Exception as e:
        st.error(f"❌ Error extracting text from PDF: {e}")
        return ""

# Extract text from DOCX
def extract_text_from_docx(file):
    try:
        doc = Document(file)
        return "\n".join([para.text for para in doc.paragraphs]).strip()
    except Exception as e:
        st.error(f"❌ Error extracting text from DOCX: {e}")
        return ""

# Parse resume file
def parse_resume(file):
    if file.name.endswith(".pdf"):
        return extract_text_from_pdf(file), "pdf"
    elif file.name.endswith(".docx"):
        return extract_text_from_docx(file), "docx"
    else:
        return None, None

# Extract sections from resume
def extract_sections(resume_text):
    sections = {}
    current_section = "Other"
    lines = resume_text.split("\n")
    section_titles = ["Technical Skills", "Soft Skills", "Certifications", "Experience", "Education", "Projects"]

    for line in lines:
        line = line.strip()
        if any(title.lower() in line.lower() for title in section_titles):
            current_section = line.strip()
            sections[current_section] = []
        else:
            sections.setdefault(current_section, []).append(line)
    
    return sections

# Extract skills using Gemini AI
def extract_skills_gemini(job_desc):
    model = genai.GenerativeModel("gemini-pro")
    prompt = f"""
    Extract key skills, tools, technologies, and relevant qualifications from this job description.
    Classify them into categories like 'Technical Skills', 'Soft Skills', 'Certifications', 'Experience', 'Projects'.
    Return the output as a JSON object where keys are section names and values are lists of skills.

    Job Description:
    {job_desc}
    """

    try:
        response = model.generate_content(prompt)
        extracted_data = json.loads(response.text)  # Ensure response is valid JSON
        return extracted_data
    except json.JSONDecodeError:
        st.error("⚠ Error: Failed to parse Gemini AI response.")
        return {}
    except Exception as e:
        st.error(f"⚠ Gemini AI Error: {e}")
        return {}

# Ensure skills are not already present in the resume
def filter_new_skills(extracted_data, resume_sections):
    filtered_skills = {}

    for section, skills in extracted_data.items():
        existing_skills = set(" ".join(resume_sections.get(section, [])).lower().split())
        new_skills = [skill for skill in skills if skill.lower() not in existing_skills]

        if new_skills:
            filtered_skills[section] = new_skills
    
    return filtered_skills

# Get user approval for adding new skills
def get_user_approval(filtered_skills):
    approved_skills = {}

    for section, skills in filtered_skills.items():
        approved = []
        for skill in skills:
            key = f"{section}_{skill}"
            if key not in st.session_state:
                st.session_state[key] = False

            st.session_state[key] = st.checkbox(f"Add to {section}: {skill}?", value=st.session_state[key])

            if st.session_state[key]:
                approved.append(skill)

        if approved:
            approved_skills[section] = approved
    
    return approved_skills

# Update resume without changing the format
def update_resume(sections, approved_skills):
    for section, skills in approved_skills.items():
        if section in sections:
            sections[section].append(", ".join(skills))
        else:
            sections[section] = [", ".join(skills)]
    return sections

# Convert updated sections to DOCX
def save_as_docx(sections, original_docx):
    doc = Document(original_docx)

    for section, content in sections.items():
        found = False
        for para in doc.paragraphs:
            if para.text.strip() == section:
                found = True
                para.add_run("\n" + "\n".join(content))
                break
        
        if not found:  # Add new section if missing
            doc.add_paragraph(section, style="Heading 1")
            doc.add_paragraph("\n".join(content))

    output_path = "Updated_Resume.docx"
    doc.save(output_path)
    return output_path

# Convert updated sections to PDF
def save_as_pdf(sections):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    for section, content in sections.items():
        pdf.set_font("Arial", style='B', size=14)
        pdf.cell(200, 10, txt=section.encode("utf-8", "ignore").decode("utf-8"), ln=True, align='L')
        pdf.set_font("Arial", size=12)
        pdf.ln(5)
        
        for line in content:
            clean_text = line.encode("utf-8", "ignore").decode("utf-8")
            pdf.multi_cell(0, 10, txt=clean_text)
        pdf.ln(5)

    output_path = "Updated_Resume.pdf"
    pdf.output(output_path, "F")
    return output_path

# Improve ATS score calculation
def calculate_ats_score(text1, text2):
    vectorizer = TfidfVectorizer(stop_words="english").fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    return cosine_similarity([vectors[0]], [vectors[1]])[0][0] * 100

# Streamlit UI
st.title("🔍 AI-Powered Resume Enhancer with ATS Scoring (Gemini AI)")

uploaded_file = st.file_uploader("📂 Upload your resume", type=["pdf", "docx"])
job_description = st.text_area("📝 Paste the job description here")

if uploaded_file and job_description:
    resume_text, file_type = parse_resume(uploaded_file)

    if resume_text:
        sections = extract_sections(resume_text)
        extracted_data = extract_skills_gemini(job_description)

        if extracted_data:
            filtered_skills = filter_new_skills(extracted_data, sections)
            st.subheader("✅ Select new skills to add:")
            approved_skills = get_user_approval(filtered_skills)

            updated_sections = update_resume(sections, approved_skills)
            ats_score_old = calculate_ats_score(job_description, resume_text)
            updated_resume_text = "\n".join([f"{sec}:\n" + "\n".join(content) for sec, content in updated_sections.items()])
            ats_score_new = calculate_ats_score(job_description, updated_resume_text)

            st.write(f"### 📊 ATS Score (Old Resume): {ats_score_old:.2f}%")
            st.write(f"### 📊 ATS Score (New Resume): {ats_score_new:.2f}%")

            output_file = save_as_docx(updated_sections, uploaded_file) if file_type == "docx" else save_as_pdf(updated_sections)
            with open(output_file, "rb") as f:
                st.download_button("📥 Download Updated Resume", f, file_name=output_file)
    else:
        st.error("❌ Unable to extract text from the resume. Please check the file format.")


2025-03-09 12:26:08.744 
  command:

    streamlit run D:\AICTIE_Microsoft_Project\Environment\project_1\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-09 12:26:11.279 Session state does not function when running a script without `streamlit run`
