In [1]:
# Install required libraries
!pip install PyPDF2 nltk scikit-learn fpdf
# Install required libraries
!pip install PyPDF2 nltk scikit-learn ipywidgets

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py): started
  Building wheel for fpdf (setup.py): finished with status 'done'
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40713 sha256=1efd4cd1914da5c5b2191c7e916644e66c823b9fd748a18cc60bfc8fa8ff1e07
  Stored in directory: c:\users\aryan\appdata\local\pip\cache\wheels\65\4f\66\bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [23]:
import PyPDF2
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import io
import os
from fpdf import FPDF
from IPython.display import display, HTML
import ipywidgets as widgets

In [24]:
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aryan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\aryan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [25]:
def extract_text_from_pdf(pdf_file):
    reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def preprocess_text(text):
    # Remove non-alphanumeric characters and tokenize
    text = re.sub(r'\W+', ' ', text)
    tokens = nltk.word_tokenize(text.lower())
    stopwords = nltk.corpus.stopwords.words('english')
    tokens = [word for word in tokens if word not in stopwords]
    return ' '.join(tokens)

def calculate_relevancy_score(job_description, resume_text):
    # Vectorize the texts
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([job_description, resume_text])
    # Calculate cosine similarity
    cosine_sim = cosine_similarity(vectors[0:1], vectors[1:2])
    return cosine_sim[0][0] * 100

def suggest_improvements(job_description, resume_text):
    job_tokens = set(job_description.split())
    resume_tokens = set(resume_text.split())
    missing_keywords = job_tokens - resume_tokens
    suggestions = f"Consider adding these keywords to your resume: {', '.join(missing_keywords)}"
    return suggestions

def create_ideal_resume(original_text, suggestions, output_file):
    class PDF(FPDF):
        def header(self):
            self.set_font('Arial', 'B', 12)
            self.cell(0, 10, 'Ideal Resume', 0, 1, 'C')
            self.ln(10)
        
        def footer(self):
            self.set_y(-15)
            self.set_font('Arial', 'I', 8)
            self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
        
        def add_section(self, title, content):
            self.set_font('Arial', 'B', 14)
            self.cell(0, 10, title, 0, 1, 'L')
            self.ln(5)
            self.set_font('Arial', '', 12)
            self.multi_cell(0, 10, content.encode('latin1', 'replace').decode('latin1'))
            self.ln(10)

    # Generate the ideal resume content
    ideal_resume = preprocess_text(original_text) + ' ' + suggest_improvements(preprocess_text(original_text), preprocess_text(suggestions))

    # Ensure the ideal resume meets the relevancy threshold
    relevancy_threshold = 60
    current_relevancy_score = calculate_relevancy_score(preprocess_text(original_text), ideal_resume)

    # Adjust the ideal resume until it meets the relevancy threshold
    while current_relevancy_score < relevancy_threshold:
        suggestions += ' ' + suggest_improvements(preprocess_text(original_text), ideal_resume)
        ideal_resume = preprocess_text(original_text) + ' ' + suggestions
        current_relevancy_score = calculate_relevancy_score(preprocess_text(original_text), ideal_resume)

    # Get the filename without extension
    filename, file_extension = os.path.splitext(output_file)
    new_resume_filename = f"New_{filename}{file_extension}"

    pdf = PDF()
    pdf.add_page()
    pdf.add_section('Original Resume Content:', original_text)
    pdf.add_section('Suggestions to Improve Relevancy:', suggestions)
    pdf.output(new_resume_filename)
    display(HTML(f'<p>Ideal Resume created and saved as {new_resume_filename}</p>'))

# Text widget for job description input
job_desc_widget = widgets.Textarea(
    description='Job Description:',
    layout=widgets.Layout(width='500px', height='150px')
)

# File upload widget for resume PDF
resume_upload_widget = widgets.FileUpload(
    description='Upload Resume',
    accept='.pdf',
    multiple=False
)

# Display the widgets
display(job_desc_widget)
display(resume_upload_widget)

def on_upload_change(change):
    if len(resume_upload_widget.value) > 0:
        job_description = job_desc_widget.value
        uploaded_file = next(iter(resume_upload_widget.value.values()))
        resume_text = extract_text_from_pdf(io.BytesIO(uploaded_file['content']))
        
        # Preprocess texts
        processed_job_description = preprocess_text(job_description)
        processed_resume_text = preprocess_text(resume_text)
        
        # Calculate relevancy score
        relevancy_score = calculate_relevancy_score(processed_job_description, processed_resume_text)
        suggestions = suggest_improvements(processed_job_description, processed_resume_text)
        
        # Display the relevancy score and suggestions
        display(HTML(f'<h3>Relevancy Score: {relevancy_score:.2f}%</h3>'))
        if relevancy_score < 50:
            display(HTML(f'<p><b>Suggestions to Improve:</b> {suggestions}</p>'))
            create_ideal_resume(resume_text, suggestions, uploaded_file['metadata']['name'])
        else:
            display(HTML('<p>Your resume is quite relevant to the job description!</p>'))
            display(HTML(f'<p><b>Suggestions to Improve Further:</b> {suggestions}</p>'))

# Attach the handler to the upload widget
resume_upload_widget.observe(on_upload_change, names='value')

Textarea(value='', description='Job Description:', layout=Layout(height='150px', width='500px'))

FileUpload(value={}, accept='.pdf', description='Upload Resume')