In [1]:
!pip install scikit-learn pandas
!pip install -U pywebio



In [2]:
import pandas as pd
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [3]:
# Create a sample list of job skills
skills_pool = ["Python", "Java", "C++", "JavaScript", "SQL", "Data Analysis", "Machine Learning", "Communication", "Problem Solving", "R", "Node.js"]

# Generate a job roles dataset
locations = ["Bangalore", "Chennai", "Coimbatore", "Mumbai", "Delhi", "Hyderabad", "Kolkata", "Pune"]
job_names = ["Software Engineer", "Data Scientist", "Product Manager", "QA Engineer", "UI/UX Designer", "Full-stack Developer"]

# Generate random job roles for each applicant
job_roles_data = []
for _ in range(100):
    skills = random.sample(skills_pool, k=random.randint(2, len(skills_pool)))
    experience = random.randint(0, 15)
    num_job_roles = random.randint(1, 3)  # Generate 1 to 3 job roles per applicant
    job_roles = random.sample(job_names, num_job_roles)

    job_roles_data.append({
        "applicant_no": f"applicant{_+1}",
        "location": random.choice(locations),
        "skills": ", ".join(skills),
        "experience": experience,
        "job_roles": ", ".join(job_roles)
    })

job_roles_df = pd.DataFrame(job_roles_data)

# To generate a job postings dataset
job_postings_data = []
companies = ["Societe Generale", "Amazon", "Google", "JP Morgan", "ZeroDown", "Microsoft", "Accenture", "TCS"]
unique_job_combinations = set()
for _ in range(100):
    company = random.choice(companies)
    job_name = random.choice(job_names)
    job_combination = (company, job_name)
    if job_combination not in unique_job_combinations:
        unique_job_combinations.add(job_combination)
        required_skills = random.sample(skills_pool, k=random.randint(2, len(skills_pool)))
        required_experience = random.randint(0, 15)  # Randomly generate required experience from 0 to 15
        job_postings_data.append({
            "job_id": f"job{_+1}",
            "company": company,
            "job_name": job_name,
            "location": random.choice(locations),
            "required_skills": ", ".join(required_skills),
            "required_experience": required_experience
        })

job_postings_df = pd.DataFrame(job_postings_data)



In [4]:
# Combine data to create the training dataset
train_data = []
for _, applicant_row in job_roles_df.iterrows():
    for _, job_posting_row in job_postings_df.iterrows():
        for job_role in applicant_row['job_roles'].split(", "):
            match_skills = all(skill in job_posting_row['required_skills'] for skill in applicant_row['skills'].split(', '))
            match_location = applicant_row['location'] == job_posting_row['location']
            match_experience = applicant_row['experience'] >= job_posting_row['required_experience']
            match_job_roles = job_role in job_posting_row['job_name']

            match = match_skills and match_location and match_experience and match_job_roles

            train_data.append((applicant_row['skills'] + " " + applicant_row['location'] + " " + str(applicant_row['experience']) + " " + job_role,
                               job_posting_row['required_skills'] + " " + job_posting_row['location'] + " " + str(job_posting_row['required_experience']) + " " + job_posting_row['job_name'],
                               int(match)))

# Convert train_data to a DataFrame
train_df = pd.DataFrame(train_data, columns=['applicant_info', 'job_info', 'match'])

# Feature extraction using TF-IDF vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(train_df['applicant_info'] + ' ' + train_df['job_info'])

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, train_df['match'], test_size=0.2, random_state=42)

# Train a Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)


In [5]:
# Display job roles dataset
print("Job Roles Dataset:")
print(job_roles_df)

# Display job postings dataset
print("\nJob Postings Dataset:")
print(job_postings_df)

Job Roles Dataset:
    applicant_no    location  \
0     applicant1   Hyderabad   
1     applicant2   Bangalore   
2     applicant3  Coimbatore   
3     applicant4   Bangalore   
4     applicant5   Hyderabad   
..           ...         ...   
95   applicant96     Chennai   
96   applicant97  Coimbatore   
97   applicant98     Kolkata   
98   applicant99       Delhi   
99  applicant100        Pune   

                                               skills  experience  \
0   Data Analysis, C++, Machine Learning, Communic...           0   
1                                     JavaScript, SQL          15   
2   Problem Solving, Communication, Data Analysis,...          13   
3   Communication, Data Analysis, Node.js, C++, Ja...          15   
4   Problem Solving, C++, Communication, Machine L...           8   
..                                                ...         ...   
95  JavaScript, Communication, Data Analysis, C++,...           1   
96  R, Node.js, Communication, Python, Probl

In [28]:
from pywebio.input import *
from pywebio.output import put_text, put_success, put_warning

while True:
    user_inputs = input_group("Job Role Predictor", [
        input("Name:", name="name"),
        input("Skills (comma-separated):", name="skills"),
        select("Location:", options=job_roles_df['location'].unique(), name="location"),
        input('Experience (in years):', type=NUMBER, min_value=0, max_value=15, value=2, name="experience"),
        input("Desired job roles:", name="job_roles")
    ])

    applicant_name = user_inputs["name"]
    applicant_skills = user_inputs["skills"]
    applicant_location = user_inputs["location"]
    applicant_experience = user_inputs["experience"]
    applicant_job_roles = user_inputs["job_roles"]

    input_data = f"{applicant_skills} {applicant_location} {applicant_experience}"
    for role in applicant_job_roles:
        input_data += f" {role}"

    input_vector = vectorizer.transform([input_data])
    prediction = model.predict(input_vector)[0]

    if prediction:
        matching_roles = []
        for _, job_posting_row in job_roles_df.iterrows():
            for job_role in applicant_job_roles:
                if job_role in job_posting_row['job_roles']:
                    matching_roles.append((job_role, job_posting_row['company'], job_posting_row['job_name'], job_posting_row['location']))
        put_success("Matching Job Roles:")
        for role, company, job_name, location in matching_roles:
            put_text(f"- {role} at {company} - {job_name} in {location}")
    else:
        put_warning("No matching job roles found.")


SessionNotFoundException: ignored

In [25]:
from ipywidgets import interact, Checkbox, interactive

@interact
def job_role_predictor(name="", skills="", location="", experience=(0, 15), job_roles=[]):
    input_data = f"{skills} {location} {experience}"
    if isinstance(job_roles, str):
        job_roles = [job_roles]
    for role in job_roles:
        input_data += f" {role}"

    input_vector = vectorizer.transform([input_data])
    prediction = model.predict(input_vector)[0]

    if prediction:
        matching_roles = []
        for _, job_posting_row in job_roles_df.iterrows():
            for job_role in job_roles:
                if job_role in job_posting_row['job_roles']:
                    matching_roles.append((job_role, job_posting_row['company'], job_posting_row['job_name'], job_posting_row['location']))
        print("Matching Job Roles:")
        for role, company, job_name, location in matching_roles:
            print(f"- {role} at {company} - {job_name} in {location}")
    else:
        print("No matching job roles found.")


interactive(children=(Text(value='', description='name'), Text(value='', description='skills'), Text(value='',…