In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE  # For handling class imbalance
import joblib 

url = '/content/processed-payroll.csv'
df = pd.read_csv(url, low_memory=False)

X = df[['DEPARTMENT_TITLE', 'JOB_TITLE', 'REGULAR_PAY', 'OVERTIME_PAY']]

df['Promotion'] = (df['TOTAL_PAY'] > df['REGULAR_PAY'] + df['OVERTIME_PAY']).astype(int)
y = df['Promotion']

label_encoder = LabelEncoder()
X['DEPARTMENT_TITLE'] = label_encoder.fit_transform(X['DEPARTMENT_TITLE'])
X['JOB_TITLE'] = label_encoder.fit_transform(X['JOB_TITLE'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

model = RandomForestClassifier(random_state=42, class_weight='balanced', n_estimators=100, max_depth=20)
model.fit(X_train_res, y_train_res)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

print(classification_report(y_test, y_pred))

joblib.dump(model, 'employee_promotion_payraise_model.pkl')
print("Model saved as 'employee_promotion_payraise_model.pkl'")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['DEPARTMENT_TITLE'] = label_encoder.fit_transform(X['DEPARTMENT_TITLE'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['JOB_TITLE'] = label_encoder.fit_transform(X['JOB_TITLE'])


Model Accuracy: 0.85
              precision    recall  f1-score   support

           0       0.52      0.79      0.63     33479
           1       0.95      0.86      0.90    169409

    accuracy                           0.85    202888
   macro avg       0.74      0.83      0.77    202888
weighted avg       0.88      0.85      0.86    202888

Model saved as 'employee_promotion_payraise_model.pkl'
