In [1]:
# Step 1: Import libraries
import pandas as pd
import re
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data = {
    "resume": [
        "Experienced Python developer with ML knowledge",
        "Frontend developer skilled in React and CSS",
        "Project manager with 5 years experience",
        "Java developer with Spring boot experience",
        "Data analyst familiar with SQL and Excel"
    ],
    "job_desc": [
        "Looking for a Python developer with machine learning skills",
        "Need a React developer for frontend projects",
        "Hiring project manager with management experience",
        "Java Spring boot developer required",
        "Data analyst needed with SQL skills"
    ],
    "match": [1, 1, 1, 1, 1]  # 1 = match, 0 = no match
}

df = pd.DataFrame(data)

In [3]:
# Step 3: Preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['resume'] = df['resume'].apply(preprocess_text)
df['job_desc'] = df['job_desc'].apply(preprocess_text)

In [4]:
# Step 4: Combine resume + job description
df['combined_text'] = df['resume'] + " " + df['job_desc']

In [5]:
# Step 5: Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    df['combined_text'], df['match'], test_size=0.2, random_state=42
)

In [6]:
# Step 6: TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 7: Train classifier
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)

In [7]:
# Step 9: Save model and vectorizer
with open("resume_model.pkl", "wb") as f:
    pickle.dump(clf, f)

with open("resume_vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved successfully!")

Model and vectorizer saved successfully!


In [9]:
# Step 10: Test new resume + JD
new_resume = "Python and machine learning developer with 3 years experience"
new_jd = "Hiring Python developer with ML skills"

combined = preprocess_text(new_resume) + " " + preprocess_text(new_jd)
vec = vectorizer.transform([combined])
match_score = clf.predict_proba(vec)[0][0] * 100
print(f"Match Score: {match_score:.2f}%")

Match Score: 100.00%
