In [66]:
# Imports
import pandas as pd
import numpy as np
import pickle
import joblib

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier


In [67]:
#Load model and scaler using full path
model_path = r'C:\Users\Admin\Desktop\INX_Employee_Performance_Project\models\saved\employee_perf_rf_model.pkl'
scaler_path = r'C:\Users\Admin\Desktop\INX_Employee_Performance_Project\models\saved\standard_scaler.pkl'

# Load model
with open(model_path, 'rb') as file:
    model = pickle.load(file)

# Load scaler
scaler = joblib.load(scaler_path)

print("✅ Model and Scaler loaded successfully!")


✅ Model and Scaler loaded successfully!


In [79]:
#Define feature columns used during training
required_features = [
    'Age', 'Attrition', 'BusinessTravelFrequency', 'DistanceFromHome',
    'EducationBackground', 'EmpDepartment', 'EmpJobRole',
    'EnvironmentSatisfaction', 'Gender', 'JobInvolvement', 'JobLevel',
    'JobSatisfaction', 'MaritalStatus', 'MonthlyIncome', 'NumCompaniesWorked',
    'OverTime', 'PercentSalaryHike', 'PerformanceRating', 'StockOptionLevel',
    'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
    'YearsAtCompany', 'YearsSinceLastPromotion'
]


print("📌 Model expects:", len(required_features), "features")


📌 Model expects: 24 features


In [81]:
#Provide new input data (encoded already if categorical)
# Example: Replace with real data or load from UI/app
new_data = pd.DataFrame([{
    'Age': 29,
    'Attrition': 0,
    'BusinessTravelFrequency': 1,
    'DistanceFromHome': 6,
    'EducationBackground': 2,
    'EmpDepartment': 1,
    'EmpJobRole': 3,
    'EnvironmentSatisfaction': 3,
    'Gender': 1,
    'JobInvolvement': 3,
    'JobLevel': 2,
    'JobSatisfaction': 4,
    'MaritalStatus': 1,
    'MonthlyIncome': 4200,
    'NumCompaniesWorked': 1,
    'OverTime': 1,
    'PercentSalaryHike': 15,
    'PerformanceRating': 1,
    'StockOptionLevel': 0,
    'TotalWorkingYears': 6,
    'TrainingTimesLastYear': 2,
    'WorkLifeBalance': 3,
    'YearsAtCompany': 3,
    'YearsSinceLastPromotion': 1
}])


print("🎯 Input Data:")
display(new_data)


🎯 Input Data:


Unnamed: 0,Age,Attrition,BusinessTravelFrequency,DistanceFromHome,EducationBackground,EmpDepartment,EmpJobRole,EnvironmentSatisfaction,Gender,JobInvolvement,...,NumCompaniesWorked,OverTime,PercentSalaryHike,PerformanceRating,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsSinceLastPromotion
0,29,0,1,6,2,1,3,3,1,3,...,1,1,15,1,0,6,2,3,3,1


In [83]:
#Preprocess and Predict
from sklearn.preprocessing import LabelEncoder
encoders = {}

for col in categorical_columns:
    if col in df.columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        encoders[col] = le
    else:
        print(f"⚠️ Column '{col}' not found in data. Skipping encoding for this column.")

# Save encoders
joblib.dump(encoders, 'models/saved/label_encoders.pkl')


⚠️ Column 'Department' not found in data. Skipping encoding for this column.
⚠️ Column 'JobRole' not found in data. Skipping encoding for this column.
⚠️ Column 'EducationField' not found in data. Skipping encoding for this column.


['models/saved/label_encoders.pkl']

In [85]:
try:
    encoders = joblib.load("models/saved/label_encoders.pkl")
    print("Loaded with joblib successfully")
except Exception as e:
    print("Joblib load failed:", e)


Loaded with joblib successfully


In [87]:
for col in categorical_columns:
    if col in new_data.columns:
        le = encoders.get(col)
        if le is not None:
            new_data.loc[:, col] = new_data[col].map(lambda x: le.transform([x])[0] if x in le.classes_ else -1)


In [89]:
model = joblib.load(r'C:\Users\Admin\Desktop\INX_Employee_Performance_Project\models\saved\employee_perf_rf_model.pkl')


In [91]:
print(new_data.columns.tolist())

['Age', 'Attrition', 'BusinessTravelFrequency', 'DistanceFromHome', 'EducationBackground', 'EmpDepartment', 'EmpJobRole', 'EnvironmentSatisfaction', 'Gender', 'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MaritalStatus', 'MonthlyIncome', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike', 'PerformanceRating', 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany', 'YearsSinceLastPromotion']


In [101]:
# Make a copy to avoid SettingWithCopyWarning
new_data_renamed_copy = new_data_renamed.copy()

# Add missing columns with default 0 if they don't exist
for col in expected_cols:
    if col not in new_data_renamed_copy.columns:
        new_data_renamed_copy[col] = 0  # or suitable default

# Now select and reorder columns exactly
new_data_for_scaler = new_data_renamed_copy[expected_cols]

# Scale and predict
input_scaled = scaler.transform(new_data_for_scaler)
input_df = pd.DataFrame(input_scaled, columns=expected_cols)

prediction = model.predict(input_df)
prediction_proba = model.predict_proba(input_df)

print("🎯 Predicted Performance Rating:", prediction[0])
print("📊 Prediction Probabilities:", prediction_proba[0])


🎯 Predicted Performance Rating: 3
📊 Prediction Probabilities: [0.09372553 0.78072441 0.12555006]


In [103]:
# e.g., {'Poor': 0, 'Average': 1, 'Good': 2}
label_map = {
    0: 'Poor',
    1: 'Average',
    2: 'Good',
    3: 'Excellent'  # or appropriate label
}

print("🔍 Predicted Class Label:", label_map.get(prediction[0], "Unknown"))



🔍 Predicted Class Label: Excellent
