In [1]:
# Install required libraries
!pip install streamlit
!pip install PyPDF2
!pip install pandas
!pip install scikit-learn



In [10]:
# Import libraries
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to extract text
def get_text_from_pdf(pdf_file):
    #Extracts and combines text from all pages
    pdf_reader = PdfReader(pdf_file)
    full_text = ""
    for page in pdf_reader.pages:
        full_text += page.extract_text() or ""  # Handles empty pages
    return full_text.strip()

# Function to rank resumes
def evaluate_resumes(job_description, resume_texts):

    #Ranks resumes by comparing their similarity to the job description using TF-IDF and cosine similarity.

    # Combine job description and resumes into a single list
    all_texts = [job_description] + resume_texts

    # Convert texts into TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(all_texts).toarray()

    # Calculate cosine similarity between job description and resumes
    job_desc_vector = tfidf_matrix[0]
    resume_vectors = tfidf_matrix[1:]
    similarity_scores = cosine_similarity([job_desc_vector], resume_vectors).flatten()

    return similarity_scores

# Streamlit app
st.title("AI-Powered Resume Screening Tool")

# Input for job description
st.header("Enter Job Description")
job_desc = st.text_area("Paste the job description here:")

# File uploader for resumes
st.header("Upload Resumes")
uploaded_files = st.file_uploader("Upload PDF resumes", type=["pdf"], accept_multiple_files=True)

# Process resumes
if uploaded_files and job_desc:
    st.header("Resume Ranking Results")

    # Extract text from uploaded resumes
    resume_texts = []
    for file in uploaded_files:
        text = get_text_from_pdf(file)
        resume_texts.append(text)

    if resume_texts:
        # Rank resumes based on similarity to job description
        similarity_scores = evaluate_resumes(job_desc, resume_texts)

        # Create a DataFrame to display results
        results_df = pd.DataFrame({
            "Resume File": [file.name for file in uploaded_files],
            "Similarity Score": similarity_scores
        })

        # Sort results by score in descending order
        results_df = results_df.sort_values(by="Similarity Score", ascending=False)

        # Display results
        for index, row in results_df.iterrows():
            st.subheader(f"Resume: {row['Resume File']}")
            st.write(f"Similarity Score: {row['Similarity Score']:.4f}")
            st.write("Resume Snippet:")
            st.write(resume_texts[index][:300] + "...")
    else:
        st.warning("No valid text found in the uploaded resumes.")



In [None]:
!streamlit run app.py &


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.230.64.21:8501[0m
[0m
