In [None]:
# ---------------------------------------------------------
# AI Resume Ranker using TF-IDF + Cosine Similarity
# PDF Upload Version (User-defined resumes)
# ---------------------------------------------------------

!pip install PyPDF2
!pip install pdfminer.six

import PyPDF2 # Still needed for error handling of non-pdfminer readable PDFs if we revert
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from pdfminer.high_level import extract_text

# ---------------------------------------------------------
# 1. FUNCTION TO EXTRACT TEXT FROM PDF (Using advanced function)
# ---------------------------------------------------------

def extract_text_from_pdf_advanced(pdf_path):
    try:
        text = extract_text(pdf_path)
        return text
    except Exception as e:
        print(f"Error extracting text from {pdf_path} using pdfminer.six: {e}")
        # Fallback to PyPDF2 if pdfminer.six fails, or return empty string
        # For simplicity, returning empty string for now, as PyPDF2 might not be ideal for advanced extraction
        return ""

# NOTE: The 'extract_text_from_pdf_advanced' function is defined in a separate cell.
# We will use that here instead of the original 'extract_text_from_pdf'.

# ---------------------------------------------------------
# 2. USER UPLOAD PDF RESUMES
# ---------------------------------------------------------

num = int(input("Enter number of PDF resumes to upload: "))

resume_texts = []
resume_files = []

for i in range(num):
    pdf_path = input(f"\nEnter path of Resume PDF {i+1}: ")

    print("Extracting text...")
    # Use the advanced extraction function
    text = extract_text_from_pdf_advanced(pdf_path)
    if text.strip(): # Only add if text extraction was successful and not empty/whitespace
        resume_files.append(pdf_path)
        resume_texts.append(text)
    else:
        print(f"Warning: No meaningful text extracted from '{pdf_path}'. It will be skipped.")

print("\nPDF text extraction complete!")

# Handle case where no valid resumes were extracted
if not resume_texts:
    print("No valid PDF resumes were extracted. Cannot proceed with ranking.")
    # Optionally, you might want to exit or prompt the user again
else:
    # ---------------------------------------------------------
    # 3. USER INPUT JOB DESCRIPTION
    # ---------------------------------------------------------

    print("\nEnter Job Description:")
    job_description = input("\n")

    # ---------------------------------------------------------
    # 4. TF-IDF + COSINE SIMILARITY
    # ---------------------------------------------------------

    documents = resume_texts + [job_description]

    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(documents)

    # Last one is JD vector
    jd_vector = tfidf_matrix[-1]

    # Resume vectors
    resume_vectors = tfidf_matrix[:-1]

    # Similarity score for each resume
    scores = cosine_similarity(resume_vectors, jd_vector).flatten()

    # Put in DataFrame
    df = pd.DataFrame({
        "Resume File": resume_files,
        "Similarity Score": scores
    })

    # Sort by score
    df = df.sort_values(by="Similarity Score", ascending=False)

    # ---------------------------------------------------------
    # 5. FINAL RANKING
    # ---------------------------------------------------------

    print("\n==============================")
    print("        FINAL RANKING")
    print("==============================")

    rank = 1
    for index, row in df.iterrows():
        print(f"\nRank {rank}")
        print("File:", row["Resume File"])
        print("Score:", round(row["Similarity Score"], 4))
        rank += 1

Enter number of PDF resumes to upload: 3

Enter path of Resume PDF 1: /content/Data_Analyst_Resume_1.pdf
Extracting text...

Enter path of Resume PDF 2: /content/Data_Analyst_Resume_2.pdf
Extracting text...

Enter path of Resume PDF 3: /content/Resume_Data_Analyst.pdf
Extracting text...

PDF text extraction complete!

Enter Job Description:

 Junior Data Analyst  Skills: SQL, R, Excel, Google Data Studio, Data Visualization, ETL  Experience: 1.5 years assisting in data mining, reporting, and visualization tasks.  Education: B.Com with specialization in Business Analytics  Projects:- Financial KPI dashboard development- Automated Excel-based reporting too

        FINAL RANKING

Rank 1
File: /content/Data_Analyst_Resume_2.pdf
Score: 0.9401

Rank 2
File: /content/Resume_Data_Analyst.pdf
Score: 0.3425

Rank 3
File: /content/Data_Analyst_Resume_1.pdf
Score: 0.2326
