In [11]:
!pip install faker

Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.1.0


In [15]:

import pandas as pd
import random
from faker import Faker

fake = Faker()

def generate_case(case_type: str):
    """Generate a single resume entry based on case type"""
    if case_type == "best":
        return {
            "shortlisting_probability": round(random.uniform(0.90, 1.0), 2),
            "matched_skills_count": random.randint(14, 20),
            "total_job_skills": random.randint(18, 22),
            "skill_match_percent": round(random.uniform(0.70, 0.85), 2),
            "cgpa": round(random.uniform(8.5, 10.0), 2),
            "recommendation": 1,
            "fit_label": 1,
            "fit_score": round(random.uniform(8.5, 10.0), 2),
            "resume_quality_score": round(random.uniform(8.5, 10.0), 2),
            "market_value_score": random.randint(85, 100),
        }
    elif case_type == "base":
        return {
            "shortlisting_probability": round(random.uniform(0.55, 0.75), 2),
            "matched_skills_count": random.randint(8, 12),
            "total_job_skills": random.randint(14, 18),
            "skill_match_percent": round(random.uniform(0.55, 0.65), 2),
            "cgpa": round(random.uniform(6.5, 7.5), 2),
            "recommendation": random.choice([0, 1]),
            "fit_label": 1,
            "fit_score": round(random.uniform(5.5, 7.5), 2),
            "resume_quality_score": round(random.uniform(5.5, 7.5), 2),
            "market_value_score": random.randint(55, 75),
        }
    elif case_type == "worst":
        return {
            "shortlisting_probability": round(random.uniform(0.10, 0.40), 2),
            "matched_skills_count": random.randint(1, 5),
            "total_job_skills": random.randint(10, 15),
            "skill_match_percent": round(random.uniform(0.10, 0.40), 2),
            "cgpa": round(random.uniform(4.0, 6.0), 2),
            "recommendation": 0,
            "fit_label": 0,
            "fit_score": round(random.uniform(1.0, 4.5), 2),
            "resume_quality_score": round(random.uniform(1.0, 4.5), 2),
            "market_value_score": random.randint(20, 50),
        }

# Generate dataset
num_samples = 300
data = []

# Distribute cases evenly: 10 worst, 10 base, 10 best
for _ in range(num_samples // 3):
    data.append(generate_case("worst"))
    data.append(generate_case("base"))
    data.append(generate_case("best"))

# Create DataFrame
df = pd.DataFrame(data)

# Shuffle the dataset for randomness
df = df.sample(frac=1).reset_index(drop=True)

print(df.head())  # Print sample rows



   shortlisting_probability  matched_skills_count  total_job_skills  \
0                      0.96                    15                18   
1                      0.99                    17                20   
2                      0.31                     1                13   
3                      0.31                     3                13   
4                      0.24                     1                10   

   skill_match_percent  cgpa  recommendation  fit_label  fit_score  \
0                 0.73  8.61               1          1       8.93   
1                 0.71  8.62               1          1       9.49   
2                 0.36  4.96               0          0       1.78   
3                 0.15  4.45               0          0       1.72   
4                 0.30  4.45               0          0       4.19   

   resume_quality_score  market_value_score  
0                  9.00                  92  
1                  9.73                  94  
2             

In [17]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
import pandas as pd
import numpy as np

# ✅ Assume `df` with 300 records is already defined
# Shuffle it to ensure randomness
df = df.sample(frac=1).reset_index(drop=True)

# 🎯 Train the model using the full dataset
X_train = df.drop("market_value_score", axis=1)
y_train = df["market_value_score"]

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 🚀 Function to predict & analyze market value
def assess_market_value(resume_features: dict):
    X_input = pd.DataFrame([resume_features])
    predicted_score = int(model.predict(X_input)[0])

    # Analyze feature importance
    perm_importance = permutation_importance(model, X_train, y_train, n_repeats=10, random_state=42)
    importances = perm_importance.importances_mean
    sorted_idx = np.argsort(importances)[::-1]
    top_features = [X_train.columns[i] for i in sorted_idx[:3]]

    # HR Insight
    if predicted_score >= 85:
        label = "🌟 Excellent Candidate"
        note = "Top-tier resume. Strong recommendation for hiring loop."
    elif predicted_score >= 70:
        label = "✅ Good Fit"
        note = "Ready for interview. Consider for next round."
    elif predicted_score >= 50:
        label = "⚠️ Average Fit"
        note = "Potential with some gaps. Evaluate project depth or training needs."
    else:
        label = "❌ Not Recommended"
        note = "Low readiness score. Skip unless role is junior/entry-level."

    # 📊 Final Output
    print("\n📈 Resume Market Value Assessment")
    print(f"🧠 Predicted Market Value Score : {predicted_score} / 100")
    print(f"{label}")
    print(f"📌 Evaluation Note              : {note}")
    print(f"🔍 Top Influencing Factors      : {', '.join(top_features)}")

    return {
        "market_value_score": predicted_score,
        "label": label,
        "note": note,
        "top_factors": top_features
    }

# 🧪 Example resumes to test
sample_resume_1 = {
    "shortlisting_probability": 0.82,
    "matched_skills_count": 10,
    "total_job_skills": 12,
    "skill_match_percent": 0.83,
    "cgpa": 8.1,
    "recommendation": 1,
    "fit_label": 1,
    "fit_score": 8.2,
    "resume_quality_score": 8.0
}

sample_resume_2 = {
    "shortlisting_probability": 0.55,
    "matched_skills_count": 6,
    "total_job_skills": 14,
    "skill_match_percent": 0.43,
    "cgpa": 6.4,
    "recommendation": 0,
    "fit_label": 0,
    "fit_score": 4.8,
    "resume_quality_score": 5.3
}

# ✅ Run assessment
print("🔍 Evaluating Sample Resume 1")
result_1 = assess_market_value(sample_resume_1)

print("\n🔍 Evaluating Sample Resume 2") x 
result_2 = assess_market_value(sample_resume_2)


🔍 Evaluating Sample Resume 1

📈 Resume Market Value Assessment
🧠 Predicted Market Value Score : 84 / 100
✅ Good Fit
📌 Evaluation Note              : Ready for interview. Consider for next round.
🔍 Top Influencing Factors      : fit_score, shortlisting_probability, cgpa

🔍 Evaluating Sample Resume 2

📈 Resume Market Value Assessment
🧠 Predicted Market Value Score : 47 / 100
❌ Not Recommended
📌 Evaluation Note              : Low readiness score. Skip unless role is junior/entry-level.
🔍 Top Influencing Factors      : fit_score, shortlisting_probability, cgpa


In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 📊 Make predictions on the training data
y_pred = model.predict(X_train)

# 📈 Evaluation metrics
r2 = r2_score(y_train, y_pred)
mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)

# 🧾 Print Evaluation Report
print("\n🧪 Model Evaluation Report")
print(f"🔹 R² Score       : {r2:.4f}")
print(f"🔹 MAE            : {mae:.2f}")
print(f"🔹 MSE            : {mse:.2f}")
print(f"🔹 RMSE           : {rmse:.2f}")



🧪 Model Evaluation Report
🔹 R² Score       : 0.9868
🔹 MAE            : 2.24
🔹 MSE            : 7.81
🔹 RMSE           : 2.79


In [23]:
df.to_csv("resume_market_value_dataset.csv", index=False)

In [25]:
import csv
import random
from faker import Faker

fake = Faker()

# Define roles and skills
roles = [
    "Machine Learning Engineer", "Full Stack Developer", "AI Research Assistant",
    "Backend Developer", "Frontend Developer", "Data Scientist", "NLP Engineer",
    "Software Engineer", "MLOps Engineer", "Data Analyst", "Deep Learning Engineer",
    "Computer Vision Engineer", "DevOps Engineer", "AI/ML Intern", "Cloud Engineer",
    "Blockchain Architect", "Data Engineer", "Frontend Architect", "ML Ops Specialist"
]

skills = [
    "Python", "Java", "Rust", "Seaborn", "FastAPI", "Scikit-Learn", "LLMs",
    "Transformers", "React", "Node.js", "TypeScript", "PostgreSQL", "MongoDB",
    "Docker", "TensorFlow", "PyTorch", "OpenCV", "Vue.js", "C++", "Bootstrap",
    "Tailwind CSS", "Git", "Excel", "Power BI", "Flask"
]

# Number of rows
num_rows = 100

# CSV header
columns = [
    "Role", "Skills", "shortlisting_probability", "matched_skills_count", "total_job_skills",
    "skill_match_percent", "cgpa", "recommendation", "fit_label", "fit_score",
    "resume_quality_score", "market_value_score"
]

def generate_row():
    role = random.choice(roles)
    total_job_skills = random.randint(5, 12)
    job_skills = random.sample(skills, total_job_skills)
    
    matched_skills_count = random.randint(1, total_job_skills)
    matched_skills = random.sample(job_skills, matched_skills_count)

    skill_match_percent = round((matched_skills_count / total_job_skills) * 100, 2)
    shortlisting_probability = round(random.uniform(0.4, 0.99), 2)
    cgpa = round(random.uniform(6.0, 10.0), 2)
    recommendation = random.choice(["Yes", "No"])
    fit_score = round(random.uniform(5.0, 10.0), 2)
    fit_label = "Fit" if fit_score >= 7.0 else "No Fit"
    resume_quality_score = round(random.uniform(5.0, 10.0), 2)
    market_value_score = round(random.uniform(50, 100), 2)

    return {
        "Role": role,
        "Skills": ", ".join(job_skills),
        "shortlisting_probability": shortlisting_probability,
        "matched_skills_count": matched_skills_count,
        "total_job_skills": total_job_skills,
        "skill_match_percent": skill_match_percent,
        "cgpa": cgpa,
        "recommendation": recommendation,
        "fit_label": fit_label,
        "fit_score": fit_score,
        "resume_quality_score": resume_quality_score,
        "market_value_score": market_value_score,
    }

# Write to CSV
with open("resume_data.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=columns, quoting=csv.QUOTE_NONNUMERIC)
    writer.writeheader()
    for _ in range(num_rows):
        writer.writerow(generate_row())

print("✅ resume_data.csv generated with 100 fake resume entries.")


✅ resume_data.csv generated with 100 fake resume entries.


In [2]:
import csv
import random
from faker import Faker

fake = Faker()

# Define roles, skills, and new data
roles = [
    "Machine Learning Engineer", "Full Stack Developer", "AI Research Assistant",
    "Backend Developer", "Frontend Developer", "Data Scientist", "NLP Engineer",
    "Software Engineer", "MLOps Engineer", "Data Analyst", "Deep Learning Engineer",
    "Computer Vision Engineer", "DevOps Engineer", "AI/ML Intern", "Cloud Engineer",
    "Blockchain Architect", "Data Engineer", "Frontend Architect", "ML Ops Specialist"
]

skills = [
    "Python", "Java", "Rust", "Seaborn", "FastAPI", "Scikit-Learn", "LLMs",
    "Transformers", "React", "Node.js", "TypeScript", "PostgreSQL", "MongoDB",
    "Docker", "TensorFlow", "PyTorch", "OpenCV", "Vue.js", "C++", "Bootstrap",
    "Tailwind CSS", "Git", "Excel", "Power BI", "Flask"
]

companies = [
    "Infosys", "TCS", "Wipro", "Cognizant", "Accenture", "IBM India", "Capgemini",
    "Tech Mahindra", "HCL Technologies", "Zoho Corp", "Mindtree", "L&T Infotech"
]

indian_branches = [
    "Mumbai", "Bangalore", "Hyderabad", "Pune", "Chennai", "Delhi", "Noida",
    "Gurgaon", "Kolkata", "Ahmedabad", "Nagpur", "Indore"
]

emails = [
    "mhtcet@gmail.com",
    "dkhandagale.aditya.dseit@kgce.edu.in",
    "patkar.omkar.ds@kgce.edu.in",
    "alampawan30@gmail.com",
    "sutar.nikhil.ds@kgce.edu.in"
]

# Number of rows
num_rows = 100

# CSV header
columns = [
    "Company", "Branch", "Experience", "Role", "Skills", "shortlisting_probability",
    "matched_skills_count", "total_job_skills", "skill_match_percent", "cgpa",
    "recommendation", "fit_label", "fit_score", "resume_quality_score", "market_value_score", "Email"
]

# Function to generate a row
def generate_row(index):
    role = random.choice(roles)
    total_job_skills = random.randint(5, 12)
    job_skills = random.sample(skills, total_job_skills)
    matched_skills_count = random.randint(1, total_job_skills)

    return {
        "Company": random.choice(companies),
        "Branch": random.choice(indian_branches),
        "Experience": random.randint(0, 8),
        "Role": role,
        "Skills": ", ".join(job_skills),
        "shortlisting_probability": round(random.uniform(0.4, 0.99), 2),
        "matched_skills_count": matched_skills_count,
        "total_job_skills": total_job_skills,
        "skill_match_percent": round((matched_skills_count / total_job_skills) * 100, 2),
        "cgpa": round(random.uniform(6.0, 10.0), 2),
        "recommendation": random.choice(["Yes", "No"]),
        "fit_label": "Fit" if random.random() > 0.3 else "No Fit",
        "fit_score": round(random.uniform(5.0, 10.0), 2),
        "resume_quality_score": round(random.uniform(5.0, 10.0), 2),
        "market_value_score": round(random.uniform(50, 100), 2),
        "Email": emails[index % len(emails)]
    }

# Write to CSV
with open("resume_data_with_email.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=columns, quoting=csv.QUOTE_NONNUMERIC)
    writer.writeheader()
    for i in range(num_rows):
        writer.writerow(generate_row(i))

print("✅ resume_data_with_email.csv generated with email addresses.")

✅ resume_data_with_email.csv generated with email addresses.
