In [290]:
#import all required libraries

import os
import json
import pickle
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')

In [291]:
artifact_loc = '/content/Artifacts'

os.listdir(artifact_loc)

['feature_scaling.pkl',
 '.ipynb_checkpoints',
 'model.pkl',
 'training_features.json']

In [292]:
class Artifacts:
    def __init__(self,artifact_loc):
        self.artifact_loc = artifact_loc

    def load_features(self):
        with open(os.path.join(self.artifact_loc,'training_features.json') , 'r') as f:
            __features = json.load(f)['features']
        return __features

    def load_scaling(self):
        with open(os.path.join(self.artifact_loc,'feature_scaling.pkl') , 'rb') as f:
            __scaling = pickle.load(f)
        return __scaling

    def load_model(self):
        with open(os.path.join(self.artifact_loc,'model.pkl') , 'rb') as f:
            __model = pickle.load(f)
        return __model

In [283]:
#
artifacts = Artifacts(artifact_loc)
input_features = artifacts.load_features()
scaling = artifacts.load_scaling()
model = artifacts.load_model()

In [285]:
print(input_features)

['Age', 'BusinessTravel', 'DailyRate', 'Department', 'DistanceFromHome', 'Education', 'EducationField', 'EnvironmentSatisfaction', 'Gender', 'HourlyRate', 'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction', 'MaritalStatus', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager']


In [286]:
len(input_features)

30

In [293]:
#master function

def predict_response(input_features):
    # feature columns
    feature_columns = [
        'Age', 'DailyRate', 'DistanceFromHome', 'HourlyRate', 'MonthlyIncome',
        'MonthlyRate', 'NumCompaniesWorked', 'PercentSalaryHike',
        'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear',
        'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
        'YearsWithCurrManager', 'BusinessTravel_Non-Travel',
        'BusinessTravel_Travel_Frequently', 'BusinessTravel_Travel_Rarely',
        'Department_Human Resources', 'Department_Research & Development',
        'Department_Sales', 'Education_Bachelors', 'Education_Intermediate',
        'Education_Masters', 'Education_PhD', 'Education_Schooling',
        'EducationField_Human Resources', 'EducationField_Life Sciences',
        'EducationField_Marketing', 'EducationField_Medical',
        'EducationField_Other', 'EducationField_Technical Degree',
        'EnvironmentSatisfaction_High', 'EnvironmentSatisfaction_Low',
        'EnvironmentSatisfaction_Medium', 'EnvironmentSatisfaction_Very high',
        'Gender_Female', 'Gender_Male', 'JobInvolvement_High',
        'JobInvolvement_Low', 'JobInvolvement_Medium',
        'JobInvolvement_Very high', 'JobLevel_Fresher', 'JobLevel_Junior',
        'JobLevel_Manager', 'JobLevel_Senior', 'JobLevel_Team Lead',
        'JobRole_Healthcare Representative', 'JobRole_Human Resources',
        'JobRole_Laboratory Technician', 'JobRole_Manager',
        'JobRole_Manufacturing Director', 'JobRole_Research Director',
        'JobRole_Research Scientist', 'JobRole_Sales Executive',
        'JobRole_Sales Representative', 'JobSatisfaction_High',
        'JobSatisfaction_Low', 'JobSatisfaction_Medium',
        'JobSatisfaction_Very high', 'MaritalStatus_Divorced',
        'MaritalStatus_Married', 'MaritalStatus_Single', 'OverTime_No',
        'OverTime_Yes', 'PerformanceRating_Excellent',
        'PerformanceRating_Outstanding', 'RelationshipSatisfaction_Excellent',
        'RelationshipSatisfaction_Good', 'RelationshipSatisfaction_Outstanding',
        'RelationshipSatisfaction_Poor', 'WorkLifeBalance_Best',
        'WorkLifeBalance_Better', 'WorkLifeBalance_Good',
        'WorkLifeBalance_Poor'
    ]

    # categorical columns and their categories
    categorical_columns = {
        'BusinessTravel': ['Non-Travel', 'Travel_Frequently', 'Travel_Rarely'],
        'Department': ['Human Resources', 'Research & Development', 'Sales'],
        'Education': ['Bachelors', 'Intermediate', 'Masters', 'PhD', 'Schooling'],
        'EducationField': ['Human Resources', 'Life Sciences', 'Marketing', 'Medical', 'Other', 'Technical Degree'],
        'EnvironmentSatisfaction': ['High', 'Low', 'Medium', 'Very high'],
        'Gender': ['Female', 'Male'],
        'JobInvolvement': ['High', 'Low', 'Medium', 'Very high'],
        'JobLevel': ['Fresher', 'Junior', 'Manager', 'Senior', 'Team Lead'],
        'JobRole': ['Healthcare Representative', 'Human Resources', 'Laboratory Technician', 'Manager', 'Manufacturing Director',
                    'Research Director', 'Research Scientist', 'Sales Executive', 'Sales Representative'],
        'JobSatisfaction': ['High', 'Low', 'Medium', 'Very high'],
        'MaritalStatus': ['Divorced', 'Married', 'Single'],
        'OverTime': ['No', 'Yes'],
        'PerformanceRating': ['Excellent', 'Outstanding'],
        'RelationshipSatisfaction': ['Excellent', 'Good', 'Outstanding', 'Poor'],
        'WorkLifeBalance': ['Best', 'Better', 'Good', 'Poor']
    }

    processed_data = pd.DataFrame(columns=feature_columns)

    # for categorical columns
    for column, categories in categorical_columns.items():
        for category in categories:
            processed_data[f'{column}_{category}'] = [1 if input_features.get(column) == category else 0]

    # for numerical columns
    numerical_columns = [
        'Age', 'DailyRate', 'DistanceFromHome', 'HourlyRate', 'MonthlyIncome', 'MonthlyRate',
        'NumCompaniesWorked', 'PercentSalaryHike', 'StockOptionLevel', 'TotalWorkingYears',
        'TrainingTimesLastYear', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager'
    ]
    for column in numerical_columns:
        processed_data[column] = [input_features.get(column, 0)]

    # order
    processed_data = processed_data.reindex(columns=feature_columns, fill_value=0)

    # Scaling
    scaled_data = scaling.transform(processed_data)

    # Predict using the model
    prediction = model.predict(scaled_data)[0]
    prediction_proba = model.predict_proba(scaled_data)

    # Determine the result
    result = "The employee is expected to remain with the company." if prediction == 0 else "The chances are high that the employee will leave the company."

    # Return the prediction, probabilities, and features in the specified order
    return result, prediction_proba

In [288]:
#The employee is expected to remain with the company.'
data = {
    "Age": 40,
    "BusinessTravel": "Travel_Frequently",
    "DailyRate": 593,
    "Department": "Research & Development",
    "DistanceFromHome": 9,
    "Education": "Masters",
    "EducationField": "Medical",
    "EnvironmentSatisfaction": "Medium",
    "Gender": "Female",
    "HourlyRate": 88,
    "JobInvolvement": "High",
    "JobLevel": "Senior",
    "JobRole": "Research Director",
    "JobSatisfaction": "High",
    "MaritalStatus": "Single",
    "MonthlyIncome": 13499,
    "MonthlyRate": 13782,
    "NumCompaniesWorked": 9,
    "OverTime": "No",
    "PercentSalaryHike": 17,
    "PerformanceRating": "Excellent",
    "RelationshipSatisfaction": "Excellent",
    "StockOptionLevel": 0,
    "TotalWorkingYears": 20,
    "TrainingTimesLastYear": 3,
    "WorkLifeBalance": "Good",
    "YearsAtCompany": 18,
    "YearsInCurrentRole": 7,
    "YearsSinceLastPromotion": 2,
    "YearsWithCurrManager": 13
}



result = predict_response(data)
print("Response - " , result)

Response -  ('The employee is expected to remain with the company.', array([[1., 0.]]))


In [289]:
#The chances are high that the employee will leave the company.

data = {
    "Age": 36,
    "BusinessTravel": "Travel_Rarely",
    "DailyRate": 938,
    "Department": "Research & Development",
    "DistanceFromHome": 2,
    "Education": "Masters",
    "EducationField": "Medical",
    "EnvironmentSatisfaction": "High",
    "Gender": "Male",
    "HourlyRate": 79,
    "JobInvolvement": "High",
    "JobLevel": "Fresher",
    "JobRole": "Laboratory Technician",
    "JobSatisfaction": "High",
    "MaritalStatus": "Single",
    "MonthlyIncome": 2519,
    "MonthlyRate": 12287,
    "NumCompaniesWorked": 4,
    "OverTime": "No",
    "PercentSalaryHike": 21,
    "PerformanceRating": "Outstanding",
    "RelationshipSatisfaction": "Excellent",
    "StockOptionLevel": 0,
    "TotalWorkingYears": 16,
    "TrainingTimesLastYear": 6,
    "WorkLifeBalance": "Better",
    "YearsAtCompany": 11,
    "YearsInCurrentRole": 8,
    "YearsSinceLastPromotion": 3,
    "YearsWithCurrManager": 9
}



result = predict_response(data)
print("Response - " , result)

Response -  ('The chances are high that the employee will leave the company.', array([[0.4, 0.6]]))
