In [1]:
# IntelliMatch: AI-Powered Resume Screener

In [2]:
#  1. Load and Prepare the Data
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib

In [5]:
# Load Resume Data
resumes_df = pd.read_csv(r"C:\Users\v prasanthi\Desktop\APJ\RESUMES\DATA ANALYST\projects\AI_RESUME_SCREENER\AI_Resume_Screening.csv")


In [8]:
# Sample Job Description for Data Scientist
job_description = """
We are seeking a highly motivated Data Scientist to join our analytics team. The ideal candidate should have a strong background in statistics, machine learning, and data wrangling.
Responsibilities include:
- Building predictive models and machine learning algorithms.
- Analyzing large amounts of information to discover trends and patterns.
- Performing data mining and feature engineering.
- Collaborating with engineering and product teams.
Requirements:
- Proficient in Python, SQL, and machine learning libraries (scikit-learn, XGBoost, TensorFlow).
- Strong understanding of NLP, data visualization tools, and statistical techniques.
- Experience with cloud platforms like AWS or GCP is a plus.
"""

In [9]:
# Combine fields from resumes into a single text
resumes_df["Combined_Text"] = (
    resumes_df["Skills"].fillna("") + " " +
    resumes_df["Education"].fillna("") + " " +
    resumes_df["Certifications"].fillna("") + " " +
    resumes_df["Job Role"].fillna("")
)

In [10]:
# 2. TF-IDF Matching
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform([job_description] + resumes_df["Combined_Text"].tolist())


In [11]:
# Calculate cosine similarity with JD
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
resumes_df["TFIDF_Match_Score"] = cosine_sim * 100


In [15]:
import os

# Create 'model' folder if it doesn't exist
os.makedirs("model", exist_ok=True)

# Save vectorizer for future use
joblib.dump(vectorizer, "model/tfidf_vectorizer.pkl")


['model/tfidf_vectorizer.pkl']

In [16]:
# 3. AI Recruiter Model (Optional Prediction)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Convert labels
resumes_df['Label'] = resumes_df['Recruiter Decision'].map({"Hire": 1, "Reject": 0})


In [17]:
# Define features
features = resumes_df[["Experience (Years)", "Projects Count", "AI Score (0-100)", "TFIDF_Match_Score"]]
labels = resumes_df['Label']

In [18]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Train model
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

RandomForestClassifier()

In [19]:
# Evaluation
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        46
           1       1.00      1.00      1.00       154

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [20]:
# Save model
joblib.dump(clf, "model/ai_recruiter_model.pkl")

['model/ai_recruiter_model.pkl']

In [21]:
# Output top matching resumes
top_resumes = resumes_df.sort_values(by="TFIDF_Match_Score", ascending=False).head(10)
print(top_resumes[["Name", "Skills", "TFIDF_Match_Score", "AI Score (0-100)", "Projects Count", "Recruiter Decision"]])


                 Name                                        Skills  \
169       Ruth Prince                      Python, Machine Learning   
253        Juan Velez                         SQL, Machine Learning   
849    Lori Lopez DDS                 Machine Learning, Python, SQL   
108     Michael Jones                 Python, Machine Learning, SQL   
292   Katelyn Hopkins                 Machine Learning, Python, SQL   
502         Eric Pena  SQL, Deep Learning, Python, Machine Learning   
142      Thomas Davis  SQL, Machine Learning, Deep Learning, Python   
288  Jonathan Andrews  Python, Deep Learning, SQL, Machine Learning   
417        James Bell  Deep Learning, Machine Learning, Python, SQL   
917   Michael Jimenez  Deep Learning, Machine Learning, Python, SQL   

     TFIDF_Match_Score  AI Score (0-100)  Projects Count Recruiter Decision  
169          20.828692                80               0               Hire  
253          20.823912                40               2      