In [7]:
%%capture
# First install all required packages
!pip install PyPDF2 spacy scikit-learn numpy ipywidgets
!python -m spacy download en_core_web_sm

In [8]:
import re
import spacy
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from PyPDF2 import PdfReader
from io import BytesIO
from google.colab import files
from IPython.display import display
import ipywidgets as widgets

# Load English language model for NLP
nlp = spacy.load("en_core_web_sm")

def extract_text_from_pdf(pdf_file):
    """Extract text from PDF file"""
    reader = PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def preprocess_text(text):
    """Clean and preprocess text"""
    text = re.sub(r'[^\w\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text.lower()

def extract_key_phrases(text, n=10):
    """Extract key phrases using TF-IDF"""
    vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_array = np.array(vectorizer.get_feature_names_out())
    tfidf_sorting = np.argsort(tfidf_matrix.toarray()).flatten()[::-1]
    return feature_array[tfidf_sorting][:n]

def analyze_resume_job_fit(resume_text, job_desc_text):
    """Analyze resume-job match"""
    resume_clean = preprocess_text(resume_text)
    job_desc_clean = preprocess_text(job_desc_text)
    job_keywords = extract_key_phrases(job_desc_clean)

    vectorizer = TfidfVectorizer(vocabulary=job_keywords)
    job_vector = vectorizer.fit_transform([job_desc_clean])
    resume_vector = vectorizer.transform([resume_clean])

    match_score = cosine_similarity(job_vector, resume_vector)[0][0]
    missing_keywords = [kw for kw in job_keywords if kw not in resume_clean]

    return {
        'match_score': match_score,
        'job_keywords': job_keywords,
        'missing_keywords': missing_keywords
    }

def optimize_resume(resume_text, job_desc_text):
    """Optimize resume based on job description"""
    analysis = analyze_resume_job_fit(resume_text, job_desc_text)

    sections = {
        'skills': extract_section(resume_text, 'skills'),
        'experience': extract_section(resume_text, 'experience'),
        'education': extract_section(resume_text, 'education')
    }

    for kw in analysis['missing_keywords']:
        doc = nlp(kw)
        if any(token.pos_ == 'NOUN' for token in doc):
            sections['skills'] = f"{sections['skills']}, {kw}" if sections['skills'] else kw

    optimized_resume = f"OPTIMIZED RESUME\n\n"
    optimized_resume += f"SKILLS\n{sections['skills']}\n\n"
    optimized_resume += f"EXPERIENCE\n{sections['experience']}\n\n"
    optimized_resume += f"EDUCATION\n{sections['education']}\n"

    return optimized_resume, analysis

def extract_section(text, section_name):
    """Extract resume section"""
    pattern = re.compile(fr"{section_name.upper()}.*?\n(.*?)(?:\n[A-Z]+\n|\Z)", re.DOTALL | re.IGNORECASE)
    match = pattern.search(text)
    return match.group(1).strip() if match else ""

def run_optimizer():
    """Main interface function"""
    # Create widgets
    uploader = widgets.FileUpload(
        accept='.pdf',
        multiple=False,
        description='Choose PDF Resume'
    )

    job_desc_input = widgets.Textarea(
        placeholder='Paste job description here...',
        description='Job Description:',
        layout={'width': '80%', 'height': '200px'}
    )

    run_button = widgets.Button(
        description="Optimize Resume",
        button_style='success',
        layout={'width': '200px'}
    )

    output = widgets.Output()

    # Display interface
    display(widgets.VBox([
        widgets.Label("1. Upload your PDF resume:"),
        uploader,
        widgets.Label("2. Paste the job description:"),
        job_desc_input,
        widgets.Label("3. Generate optimized resume:"),
        run_button,
        output
    ]))

    def on_button_click(b):
        with output:
            output.clear_output()

            if not uploader.value:
                print("❌ Please upload a PDF resume file")
                return

            if not job_desc_input.value.strip():
                print("❌ Please enter a job description")
                return

            try:
                # Process PDF
                uploaded_file = next(iter(uploader.value))
                resume_text = extract_text_from_pdf(BytesIO(uploader.value[uploaded_file]['content']))

                # Optimize resume
                optimized_resume, analysis = optimize_resume(
                    resume_text,
                    job_desc_input.value
                )

                # Show results
                print("\n✅ Optimization Complete")
                print(f"\nMatch Score: {analysis['match_score']:.2f}/1.00")
                print("\n🔑 Important Keywords:")
                print(", ".join(analysis['job_keywords']))
                print("\n✨ Added Keywords:")
                print(", ".join(analysis['missing_keywords']) or "None")

                print("\n📄 Optimized Resume:")
                print(optimized_resume)

                # Save and offer download
                with open("optimized_resume.txt", "w") as f:
                    f.write(optimized_resume)
                print("\n⬇️ Download your optimized resume:")
                files.download("optimized_resume.txt")

            except Exception as e:
                print(f"❌ Error: {str(e)}")
                print("Please ensure you uploaded a valid PDF file")

    run_button.on_click(on_button_click)

# Run the optimizer
run_optimizer()

VBox(children=(Label(value='1. Upload your PDF resume:'), FileUpload(value={}, accept='.pdf', description='Cho…