In [14]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 2: Load your cleaned data
df = pd.read_csv('/content/job_dataset_cleaned.csv', engine='python')

# Step 3: Sample the data to reduce memory usage
df_sampled = df.sample(frac=0.1, random_state=42) # Sampling 10% of the data

# Step 4: Combine text columns
df_sampled['combined_text'] = df_sampled['skills'].fillna('') + ' ' + df_sampled['job_description'].fillna('')

# Step 5: TF-IDF vectorization
tfidf = TfidfVectorizer(max_features=500)
X_text = tfidf.fit_transform(df_sampled['combined_text'])

# Step 6: Encode the target labels
le = LabelEncoder()
y = le.fit_transform(df_sampled['Role'])

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_text, y, test_size=0.2, random_state=42, stratify=y)

# Step 8: Model training
lr_model = LogisticRegression(max_iter=1000, solver='liblinear')
lr_model.fit(X_train, y_train)

# Step 9: Evaluation
y_pred = lr_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

# Step 10: Save artifacts
joblib.dump(lr_model, "logistic_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")
joblib.dump(le, "label_encoder.pkl")

Accuracy: 1.0

Classification Report:
                                           precision    recall  f1-score   support

                           API Developer       1.00      1.00      1.00        37
                 Accessibility Developer       1.00      1.00      1.00        34
                       Account Executive       1.00      1.00      1.00        69
                         Account Manager       1.00      1.00      1.00        34
                      Account Strategist       1.00      1.00      1.00        41
                   Accounting Controller       1.00      1.00      1.00        33
                      Accounting Manager       1.00      1.00      1.00        33
           Acute Care Nurse Practitioner       1.00      1.00      1.00        34
                     Addiction Counselor       1.00      1.00      1.00        30
                Administrative Assistant       1.00      1.00      1.00        31
              Administrative Coordinator       1.00      1

['label_encoder.pkl']