This code builds and trains two neural network models using the Keras library with TensorFlow backend to predict  attrition and time frame for employee departure, respectively, based on the provided dataset.

In [408]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Read the dataset
data_subset = pd.read_csv('/content/IBM-HR-Employee-Attrition.csv')  # Replace 'your_dataset.csv' with the path to your dataset file

data_subset['Attrition'] = data_subset['Attrition'].map({'Yes': 1, 'No': 0})

# Selecting features for attrition prediction
X_attrition = data_subset[['Age', 'DistanceFromHome', 'Education',
                     'JobSatisfaction', 'MonthlyIncome', 'PercentSalaryHike',
                     'TotalWorkingYears', 'YearsAtCompany', 'YearsSinceLastPromotion',
                     'PerformanceRating', 'Attrition']]

# Split data into features and target variable for attrition prediction
y_attrition = data_subset['Attrition']

# Split data into training and testing sets for attrition prediction
X_train_attrition, X_test_attrition, y_train_attrition, y_test_attrition = train_test_split(X_attrition, y_attrition, test_size=0.2, random_state=42)

# Data scaling for attrition prediction
scaler_attrition = StandardScaler()
X_train_attrition_scaled = scaler_attrition.fit_transform(X_train_attrition)
X_test_attrition_scaled = scaler_attrition.transform(X_test_attrition)

# Define the neural network model for attrition prediction
model_attrition2 = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_attrition_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model for attrition prediction
model_attrition2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model for attrition prediction
model_attrition2.fit(X_train_attrition_scaled, y_train_attrition, epochs=50, batch_size=32, verbose=1)

# Evaluate the model for attrition prediction
loss_attrition, accuracy_attrition = model_attrition2.evaluate(X_test_attrition_scaled, y_test_attrition, verbose=0)
print(f'Attrition Prediction Loss: {loss_attrition}, Accuracy: {accuracy_attrition}')

# Calculate 'YearsUntilLeave'
data_subset['YearsUntilLeave'] = data_subset['TotalWorkingYears'] - data_subset['YearsAtCompany']

# Selecting features for time frame prediction
X_timeframe = data_subset[['Age', 'DistanceFromHome', 'Education',
                     'JobSatisfaction', 'MonthlyIncome', 'PercentSalaryHike',
                     'TotalWorkingYears', 'YearsAtCompany', 'YearsSinceLastPromotion',
                     'PerformanceRating', 'Attrition', 'YearsUntilLeave']]

# Target variable for time frame prediction
y_timeframe = data_subset['YearsUntilLeave']

# Split data into training and testing sets for time frame prediction
X_train_timeframe, X_test_timeframe, y_train_timeframe, y_test_timeframe = train_test_split(X_timeframe, y_timeframe, test_size=0.2, random_state=42)

# Data scaling for time frame prediction
scaler_timeframe = StandardScaler()
X_train_timeframe_scaled = scaler_timeframe.fit_transform(X_train_timeframe)
X_test_timeframe_scaled = scaler_timeframe.transform(X_test_timeframe)

# Define the neural network model for time frame prediction
model_timeframe2 = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_timeframe_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Compile the model for time frame prediction
model_timeframe2.compile(optimizer='adam', loss='mean_squared_error')

# Train the model for time frame prediction
model_timeframe2.fit(X_train_timeframe_scaled, y_train_timeframe, epochs=50, batch_size=32, verbose=1)

# Evaluate the model for time frame prediction
loss_timeframe = model_timeframe2.evaluate(X_test_timeframe_scaled, y_test_timeframe, verbose=0)
print(f'Time Frame Prediction Loss: {loss_timeframe}')



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Attrition Prediction Loss: 3.192291114828549e-05, Accuracy: 1.0
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 

In [409]:
# Evaluate model performance on validation or test dataset
evaluation_result = model_timeframe2.evaluate(X_test_timeframe_scaled, y_test_timeframe)
evaluation_result2 = model_attrition2.evaluate(X_test_attrition_scaled, y_test_attrition)

print("Evaluation Result:")
print(f"Loss: {evaluation_result}")
print("Evaluation2 Result:")
print(f"Loss: {evaluation_result2}")

Evaluation Result:
Loss: 0.028895806521177292
Evaluation2 Result:
Loss: [3.192291114828549e-05, 1.0]


Both evaluations show very low loss values, with the second evaluation indicating perfect accuracy. This suggests that the model performs well on the given test dataset.

In [330]:
# Save the model for attrition prediction
model_attrition2.save('akylar_attrition_prediction_model2.keras')

# Save the model for time frame prediction
model_timeframe2.save('akylar_time_frame_prediction_model2.keras')


In [279]:
############# applying new model

## NOW WE WILL EXPLORE HOW TO APPLY THESE MODELS WITH EXTRA ADDITIONS TO HAVE A MORE CLEAR OUTCOME



we will have columns in the trained data set that do not exist in the new employees data set so we have to be mindfull of how to add them and alter them

To apply your trained model for predicting attrition and time frame for new employees using the saved models, follow these steps:

After loading the models and loading your new csv file you should :
1. Add a placeholder value for the 'Attrition' column in the new dataset.
2. Preprocess the new data for attrition prediction, ensuring to select the relevant features and scale them using the previously fitted scalers.
3. Predict attrition for the new employees and convert the predictions to binary values.
4. Add the predicted attrition column to the new dataset.
5. Select features for time frame prediction, excluding 'YearsUntilLeave'and Scale the new data for time frame prediction using the previously fitted scaler.
6. Predict the time frame for new employees.
7. Adjust the predicted attrition based on the predicted time frame. (time > 2 years attrition is no, else yes)

In [418]:
from keras.models import load_model
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the saved models
model_attrition = load_model('akylar_attrition_prediction_model2.keras')
model_timeframe = load_model('akylar_time_frame_prediction_model2.keras')

# Read the new dataset
new_data = pd.read_csv('/content/machine_test_data.csv')

# Add a placeholder value for the 'Attrition' column
new_data['Attrition'] = attrition_predictions  # VALUE DOES

# Preprocess the new data for prediction
X_new_attrition = new_data[['Age', 'DistanceFromHome', 'Education',
                             'JobSatisfaction', 'MonthlyIncome', 'PercentSalaryHike',
                             'TotalWorkingYears', 'YearsAtCompany', 'YearsSinceLastPromotion',
                             'PerformanceRating', 'Attrition']]

# Scale the new data for attrition prediction using the same scaler used for training
scaler_attrition = StandardScaler()
X_new_attrition_scaled = scaler_attrition.fit_transform(X_new_attrition)

# Predict attrition for new employees
attrition_predictions = model_attrition.predict(X_new_attrition_scaled)


# Add predicted attrition column to the new_data DataFrame
new_data['Attrition_Predicted'] = predicted_attrition

# Select features for time frame prediction, excluding 'YearsUntilLeave'
X_new_timeframe = new_data[['Age', 'DistanceFromHome', 'Education',
                            'JobSatisfaction', 'MonthlyIncome', 'PercentSalaryHike',
                            'TotalWorkingYears', 'YearsAtCompany', 'YearsSinceLastPromotion',
                            'PerformanceRating', 'Attrition','Attrition_Predicted']]

# Scale the new data for time frame prediction using the same scaler used for training
scaler_timeframe = StandardScaler()
X_new_timeframe_scaled = scaler_timeframe.fit_transform(X_new_timeframe)

# Predict time frame for new employees
timeframe_predictions = model_timeframe.predict(X_new_timeframe_scaled)

# Threshold for years until leave to determine attrition
threshold_years = 2

# Adjust Attrition_Predicted based on Timeframe_Predicted
new_data['Timeframe_Predicted'] = timeframe_predictions
new_data['Attrition_Predicted'] = ['No' if years > threshold_years else 'Yes' for years in new_data['Timeframe_Predicted']]
# Print the new dataset with predicted columns
new_data



Unnamed: 0,Age,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,EnvironmentSatisfaction,Gender,HourlyRate,JobInvolvement,JobLevel,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,MonthlyRate,NumCompaniesWorked,Over18,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,Attrition,Attrition_Predicted,Timeframe_Predicted
0,28,Travel_Rarely,866,Sales,5,3,Medical,1,1469,4,Male,84,3,2,Sales Executive,1,Single,8463,23490,0,Y,No,18,3,4,80,0,6,4,3,5,4,1,3,0.000067,Yes,0.916547
1,53,Travel_Rarely,1084,Research & Development,13,2,Medical,1,250,4,Female,57,4,2,Manufacturing Director,1,Divorced,4450,26250,1,Y,No,11,3,3,80,2,5,3,3,4,2,1,3,0.000013,Yes,1.086137
2,24,Travel_Rarely,240,Human Resources,22,1,Human Resources,1,1714,4,Male,58,1,1,Human Resources,3,Married,1555,11585,1,Y,No,11,3,3,80,1,1,2,3,1,0,0,0,0.000003,Yes,0.690700
3,45,Travel_Rarely,1339,Research & Development,7,3,Life Sciences,1,86,2,Male,59,3,3,Research Scientist,1,Divorced,9724,18787,2,Y,No,17,3,3,80,1,25,2,3,1,0,0,0,0.000006,No,10.585955
4,36,Travel_Rarely,1396,Research & Development,5,2,Life Sciences,1,304,4,Male,62,3,2,Laboratory Technician,2,Single,5914,9945,8,Y,No,16,3,4,80,0,16,3,4,13,11,3,7,0.999699,No,12.367086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,31,Travel_Rarely,1232,Research & Development,7,4,Medical,1,462,3,Female,39,3,3,Manufacturing Director,4,Single,7143,25713,1,Y,Yes,14,3,3,80,0,11,2,2,11,9,4,10,0.000040,Yes,0.616204
96,29,Travel_Rarely,1246,Sales,19,3,Life Sciences,1,1497,3,Male,77,2,2,Sales Executive,3,Divorced,8620,23757,1,Y,No,14,3,3,80,2,10,3,3,10,7,0,4,0.000027,Yes,0.625269
97,37,Non-Travel,1063,Research & Development,25,5,Medical,1,529,2,Female,72,3,2,Research Scientist,3,Married,4449,23866,3,Y,Yes,15,3,1,80,2,15,2,3,13,11,10,7,0.000010,Yes,1.612647
98,47,Travel_Rarely,571,Sales,14,3,Medical,1,1503,3,Female,78,3,2,Sales Executive,3,Married,4591,24200,3,Y,Yes,17,3,3,80,1,11,4,2,5,4,1,2,0.999929,No,13.408607
