In [None]:
!pip install mysql-connector-python pandas nltk scikit-learn spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import nltk

nltk.download("punkt")
nltk.download("stopwords")
nltk.download("omw-1.4")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [None]:
!pip install kagglehub
import kagglehub

path = kagglehub.dataset_download("snehaanbhawal/resume-dataset")

print("Dataset downloaded at:", path)


Dataset downloaded at: /kaggle/input/resume-dataset


In [None]:
import os
dataset_path = "/kaggle/input/resume-dataset"
files = os.listdir(dataset_path)

print("Dataset Files:", files)


Dataset Files: ['Resume', 'data']


In [None]:
import os

dataset_path = "/kaggle/input/resume-dataset"


resume_path = os.path.join(dataset_path, "Resume")
data_path = os.path.join(dataset_path, "data")

print("Files in 'Resume':", os.listdir(resume_path))
print("Files in 'data':", os.listdir(data_path))


Files in 'Resume': ['Resume.csv']
Files in 'data': ['data']


In [None]:
import pandas as pd
import os

dataset_path = "/kaggle/input/resume-dataset"
file_path = os.path.join(dataset_path, "Resume", "Resume.csv")

df = pd.read_csv(file_path)

print(df.head())


         ID                                         Resume_str  \
0  16852973           HR ADMINISTRATOR/MARKETING ASSOCIATE\...   
1  22323967           HR SPECIALIST, US HR OPERATIONS      ...   
2  33176873           HR DIRECTOR       Summary      Over 2...   
3  27018550           HR SPECIALIST       Summary    Dedica...   
4  17812897           HR MANAGER         Skill Highlights  ...   

                                         Resume_html Category  
0  <div class="fontsize fontface vmargins hmargin...       HR  
1  <div class="fontsize fontface vmargins hmargin...       HR  
2  <div class="fontsize fontface vmargins hmargin...       HR  
3  <div class="fontsize fontface vmargins hmargin...       HR  
4  <div class="fontsize fontface vmargins hmargin...       HR  


In [None]:
print(df.columns)


Index(['ID', 'Resume_str', 'Resume_html', 'Category'], dtype='object')


In [None]:
import string

manual_stopwords = set([
    "the", "is", "in", "and", "to", "a", "of", "for", "on", "with", "at", "by", "from",
    "about", "as", "it", "this", "that", "which", "be", "are", "was", "were", "has", "have"
])

def preprocess_text(text):
    text = text.lower()
    words = text.split()
    words = [word for word in words if word not in manual_stopwords and word not in string.punctuation]
    return " ".join(words)

df["Cleaned_Resume"] = df["Resume_str"].apply(preprocess_text)

print(df[["Cleaned_Resume"]].head())


                                      Cleaned_Resume
0  hr administrator/marketing associate hr admini...
1  hr specialist, us hr operations summary versat...
2  hr director summary over 20 years experience r...
3  hr specialist summary dedicated, driven, dynam...
4  hr manager skill highlights hr skills hr depar...


AI model for similarity checking

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

job_desc = "Looking for an HR Manager with experience in employee relations, payroll, and recruitment."

texts = df["Cleaned_Resume"].tolist() + [job_desc]

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(texts)

similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()

df["Similarity"] = similarities

df_sorted = df.sort_values(by="Similarity", ascending=False)

print(df_sorted[["ID", "Similarity"]].head(10))


          ID  Similarity
55  30862904    0.269295
4   17812897    0.267079
65  17412079    0.250197
61  20925036    0.245675
80  25724495    0.242378
68  15041689    0.238716
72  26289308    0.237734
88  13376919    0.236732
9   32896934    0.235929
18  73077810    0.235820


creating UI for resume filtering


In [None]:
!pip install streamlit python-docx pdfplumber




In [None]:
!pip install streamlit
!pip install cloudflared






main python APK file

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import pdfplumber
import docx
import time
import plotly.express as px
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

bert_model = SentenceTransformer("all-MiniLM-L6-v2")

st.set_page_config(page_title="AI Resume Filtering", layout="wide")

def extract_text_from_pdf(uploaded_file):
    with pdfplumber.open(uploaded_file) as pdf:
        return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

def extract_text_from_docx(uploaded_file):
    doc = docx.Document(uploaded_file)
    return "\n".join([para.text for para in doc.paragraphs])

def generate_circular_chart(score, resume_name):
    fig = px.pie(values=[score * 100, 100 - (score * 100)], names=["Match Score", "Remaining"],
                 color_discrete_sequence=["red", "lightgrey"], hole=0.7)
    fig.update_traces(textinfo='none', hoverinfo='label+percent')
    fig.update_layout(title_text=f"{resume_name} Match Score", showlegend=False)
    return fig

st.markdown("<h1 style='text-align: center; color: #ff4b4b;'>AI-Powered Resume Filtering System</h1>", unsafe_allow_html=True)
st.write("Upload resumes, compare them with job descriptions, and get AI-powered insights!")

uploaded_files = st.file_uploader(" Upload Resume(s) (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)

job_desc = st.text_area("Enter Job Description", "Looking for an HR Manager with experience in employee relations, payroll, and recruitment.")

company_skills = st.text_area("Enter Required Skills", "Python, Machine Learning, Data Analysis")

if st.button(" Analyze Resumes"):
    if uploaded_files:
        results = []
        for uploaded_file in uploaded_files:
            file_type = uploaded_file.name.split(".")[-1]
            if file_type == "pdf":
                resume_text = extract_text_from_pdf(uploaded_file)
            elif file_type == "docx":
                resume_text = extract_text_from_docx(uploaded_file)
            else:
                st.error("Unsupported file format!")
                continue


            resume_embedding = bert_model.encode(resume_text, convert_to_tensor=True)
            job_embedding = bert_model.encode(job_desc, convert_to_tensor=True)
            skill_embedding = bert_model.encode(company_skills, convert_to_tensor=True)


            job_similarity = cosine_similarity([resume_embedding.cpu().numpy()], [job_embedding.cpu().numpy()])[0][0]
            skill_similarity = cosine_similarity([resume_embedding.cpu().numpy()], [skill_embedding.cpu().numpy()])[0][0]
            total_similarity = (job_similarity + skill_similarity) / 2

            match_label = "Excellent Fit" if total_similarity > 0.75 else "Good Fit" if total_similarity > 0.5 else "Needs Improvement"

            results.append({
                "Resume": uploaded_file.name,
                "Job Match Score": round(job_similarity, 2),
                "Skill Match Score": round(skill_similarity, 2),
                "Overall Match Score": round(total_similarity, 2),
                "Match Label": match_label
            })


            st.subheader(f"{uploaded_file.name} Match Score")
            st.plotly_chart(generate_circular_chart(total_similarity, uploaded_file.name))

        df_results = pd.DataFrame(results)
        st.subheader("Resume Match Results")
        st.dataframe(df_results.style.highlight_max(axis=0, subset=["Overall Match Score"]))


        csv = df_results.to_csv(index=False).encode('utf-8')
        st.download_button(" Download Results as CSV", csv, "resume_analysis.csv", "text/csv")
    else:
        st.error("Please upload at least one resume file!")



Overwriting app.py


new

In [None]:
!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared
!chmod +x cloudflared
!./cloudflared --version


--2025-05-02 08:52:46--  https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/cloudflare/cloudflared/releases/download/2025.4.2/cloudflared-linux-amd64 [following]
--2025-05-02 08:52:46--  https://github.com/cloudflare/cloudflared/releases/download/2025.4.2/cloudflared-linux-amd64
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/106867604/a6b2a67b-5629-4df3-aa0c-8146365a1d48?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250502%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250502T085206Z&X-Amz-Expires=300&X-Amz-Signature=b3bec9a80151d088f04a3326a83b29dfeebb86b31ada4a41cebe0529c4e8d4a8&X-Amz-S