In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import numpy as np


In [2]:
file_path = 'jobs.csv'
jobs_data = pd.read_csv(file_path)

In [3]:
jobs_data['Skills'] = jobs_data['Skills'].str.lower()
jobs_data.dropna(subset=['Skills', 'Title', 'Location'], inplace=True)


In [4]:
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(jobs_data['Skills'])

In [5]:
y_title = jobs_data['Title']
X_train, X_test, y_train, y_test = train_test_split(X, y_title, test_size=0.2, random_state=42)
model_title = RandomForestClassifier(n_estimators=100, random_state=42)
model_title.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [6]:
y_location = jobs_data['Location']
X_train, X_test, y_train, y_test = train_test_split(X, y_location, test_size=0.2, random_state=42)
model_location = RandomForestClassifier(n_estimators=100, random_state=42)
model_location.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [7]:
def predict_job_title_and_location(Skills, top_n=3, relevance_threshold=0.1):
    skills_transformed = tfidf.transform([Skills.lower()])


    probs_title = model_title.predict_proba(skills_transformed)[0]
    top_n_indices_title = np.argsort(probs_title)[-top_n:]
    top_n_probs_title = probs_title[top_n_indices_title]
    relevant_titles = [model_title.classes_[i] for i, prob in zip(top_n_indices_title, top_n_probs_title) if prob >= relevance_threshold]

    predicted_title = np.random.choice(relevant_titles) if relevant_titles else 'No relevant title found'


    probs_location = model_location.predict_proba(skills_transformed)[0]
    top_n_indices_location = np.argsort(probs_location)[-top_n:]
    predicted_location = np.random.choice([model_location.classes_[i] for i in top_n_indices_location])

    return predicted_title, predicted_location

In [11]:
example_skills = "HR"
predicted_title, predicted_location = predict_job_title_and_location(example_skills)
print("Predicted Job Title:", predicted_title)
print("Predicted Location:", predicted_location)


Predicted Job Title: Human Resources Manager
Predicted Location:  Mohandessin
