## Part 1: Preprocessing

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [2]:
# Determine the number of unique values in each column.
attrition_df.nunique()

Age                         43
Attrition                    2
BusinessTravel               3
Department                   3
DistanceFromHome            29
Education                    5
EducationField               6
EnvironmentSatisfaction      4
HourlyRate                  71
JobInvolvement               4
JobLevel                     5
JobRole                      9
JobSatisfaction              4
MaritalStatus                3
NumCompaniesWorked          10
OverTime                     2
PercentSalaryHike           15
PerformanceRating            2
RelationshipSatisfaction     4
StockOptionLevel             4
TotalWorkingYears           40
TrainingTimesLastYear        7
WorkLifeBalance              4
YearsAtCompany              37
YearsInCurrentRole          19
YearsSinceLastPromotion     16
YearsWithCurrManager        18
dtype: int64

In [3]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[['Attrition', 'Department']]


In [4]:
# Check the column names in attrition_df
print(attrition_df.columns)

# Create a list of at least 10 columns names to use as X data
x_list =['Education','Age','DistanceFromHome','JobSatisfaction', 'OverTime','StockOptionLevel','WorkLifeBalance','YearsAtCompany',
'YearsSinceLastPromotion',
'NumCompaniesWorked']

# Create X_df using your selected columns
x_df = attrition_df[x_list]

# Show the data types for X_df
print(x_df.dtypes)


Index(['Age', 'Attrition', 'BusinessTravel', 'Department', 'DistanceFromHome',
       'Education', 'EducationField', 'EnvironmentSatisfaction', 'HourlyRate',
       'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',
       'MaritalStatus', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike',
       'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')
Education                   int64
Age                         int64
DistanceFromHome            int64
JobSatisfaction             int64
OverTime                   object
StockOptionLevel            int64
WorkLifeBalance             int64
YearsAtCompany              int64
YearsSinceLastPromotion     int64
NumCompaniesWorked          int64
dtype: object


In [5]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, random_state=42)

In [6]:
print(x_df['Age'])

0       41
1       49
2       37
3       33
4       27
        ..
1465    36
1466    39
1467    27
1468    49
1469    34
Name: Age, Length: 1470, dtype: int64


In [7]:
# Convert your x_df Columns to int32 data type however you see fit
x_df = x_df.astype({'Age': 'int32', 'DistanceFromHome': 'int32', 'JobSatisfaction': 'int32', 'StockOptionLevel': 'int32', 'WorkLifeBalance': 'int32', 'YearsAtCompany': 'int32', 'YearsSinceLastPromotion': 'int32', 'NumCompaniesWorked': 'int32'})
print(x_df.dtypes)
non_numeric = x_df.select_dtypes(exclude=[np.number]).columns
if len(non_numeric) > 0:
    print(f"Non-numeric columns: {list(non_numeric)}")
else:
    print("All columns are numeric except for 'OverTime' which is categorical.")

Education                   int64
Age                         int32
DistanceFromHome            int32
JobSatisfaction             int32
OverTime                   object
StockOptionLevel            int32
WorkLifeBalance             int32
YearsAtCompany              int32
YearsSinceLastPromotion     int32
NumCompaniesWorked          int32
dtype: object
Non-numeric columns: ['OverTime']


In [8]:
if 'OverTime' in X_train.columns:
    X_train['OverTime'] = X_train['OverTime'].map({'No': 0, 'Yes': 1})
    X_test['OverTime'] = X_test['OverTime'].map({'No': 0, 'Yes': 1})

In [9]:
# Create a StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler to the training data
scaler.fit(X_train)

# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [10]:
# Create a OneHotEncoder for the Department column
from sklearn.preprocessing import OneHotEncoder

# Fit the encoder to the training data
enc = OneHotEncoder().fit(y_train[['Department']])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_encoded = enc.transform(y_train[['Department']])
y_test_encoded = enc.transform(y_test[['Department']])
#Print array shapes
print(y_train_encoded.shape, y_test_encoded.shape)

(1102, 3) (368, 3)


In [11]:
# Create a OneHotEncoder for the Attrition column
from sklearn.preprocessing import OneHotEncoder

# Fit the encoder to the training data
enc = OneHotEncoder().fit(y_df[['Attrition']])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_encoded = enc.transform(y_train[['Attrition']])
y_test_encoded = enc.transform(y_test[['Attrition']])

#Print first 5 rows of y_train_encoded and y_test_encoded
print(y_train_encoded[:5], y_test_encoded[:5])


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 5 stored elements and shape (5, 2)>
  Coords	Values
  (0, 0)	1.0
  (1, 0)	1.0
  (2, 0)	1.0
  (3, 0)	1.0
  (4, 0)	1.0 <Compressed Sparse Row sparse matrix of dtype 'float64'
	with 5 stored elements and shape (5, 2)>
  Coords	Values
  (0, 0)	1.0
  (1, 0)	1.0
  (2, 1)	1.0
  (3, 0)	1.0
  (4, 0)	1.0


## Create, Compile, and Train the Model

In [12]:
# Find the number of columns in the X training data
num_features = X_train.shape[1]
print(f"Number of input features: {num_features}")
# Create the input layer
inputs = Input(shape=(num_features,), name='input_layer')

# Create at least two shared layers
x = Dense(64, activation='relu', name='dense_1')(inputs)
x = Dense(32, activation='relu', name='dense_2')(x)

Number of input features: 10


In [13]:
# Create a branch for Department
# with a hidden layer and an output layer
# Create the hidden layer
dept_hidden = Dense(16, activation='relu', name='dept_hidden')(x)
num_departments = len(y_train['Department'].unique())
# Create the output layer
dept_output = Dense(num_departments, activation='sigmoid', name='dept_output')(dept_hidden)


In [14]:
# Create a branch for Attrition
# with a hidden layer and an output layer
# Create the hidden layer
attrition_hidden = Dense(8, activation='relu', name='attrition_hidden')(x)
# Create the output layer
branch_2 = Dense(1, activation='softmax', name='attrition_output')(attrition_hidden)

In [15]:
# Create the model
model = Model(inputs=inputs, outputs=[dept_output, branch_2], name='employee_prediction_model')
# Compile the model
model.compile(optimizer='adam',
              loss={'dept_output': 'categorical_crossentropy', 'attrition_output': 'binary_crossentropy'},
              metrics=['accuracy'])

# Summarize the model
model.summary()

In [16]:
# Train the model
y_train_dept = pd.get_dummies(y_train['Department'])  # One-hot encode Department
y_train_attrition = y_train['Attrition'].map({'No': 0, 'Yes': 1})  # Binary encode Attrition

y_test_dept = pd.get_dummies(y_test['Department'])  # One-hot encode Department
y_test_attrition = y_test['Attrition'].map({'No': 0, 'Yes': 1})  # Binary encode Attrition

# Recompile the model with the correct loss functions and metrics
model.compile(optimizer='adam',
              loss={'dept_output': 'categorical_crossentropy', 'attrition_output': 'binary_crossentropy'},
              metrics={'dept_output': 'accuracy', 'attrition_output': 'accuracy'})


model.fit(
    X_train_scaled, 
    [y_train_dept, y_train_attrition],
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    verbose=1
)


Epoch 1/100


  return self.fn(y_true, y_pred, **self._fn_kwargs)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - attrition_output_accuracy: 0.1647 - attrition_output_loss: 0.0000e+00 - dept_output_accuracy: 0.4858 - dept_output_loss: 0.6717 - loss: 0.6718 - val_attrition_output_accuracy: 0.2127 - val_attrition_output_loss: 0.0000e+00 - val_dept_output_accuracy: 0.6109 - val_dept_output_loss: 0.5411 - val_loss: 0.5418
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.1635 - attrition_output_loss: 0.0000e+00 - dept_output_accuracy: 0.6767 - dept_output_loss: 0.5116 - loss: 0.5115 - val_attrition_output_accuracy: 0.2127 - val_attrition_output_loss: 0.0000e+00 - val_dept_output_accuracy: 0.6063 - val_dept_output_loss: 0.5217 - val_loss: 0.5231
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.1680 - attrition_output_loss: 0.0000e+00 - dept_output_accuracy: 0.6543 - dept_output_loss: 0.5056 - loss:

  return self.fn(y_true, y_pred, **self._fn_kwargs)


Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.1693 - attrition_output_loss: 0.0000e+00 - dept_output_accuracy: 0.6638 - dept_output_loss: 0.5041 - loss: 0.5042 - val_attrition_output_accuracy: 0.2127 - val_attrition_output_loss: 0.0000e+00 - val_dept_output_accuracy: 0.6154 - val_dept_output_loss: 0.5434 - val_loss: 0.5453
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.1799 - attrition_output_loss: 0.0000e+00 - dept_output_accuracy: 0.6529 - dept_output_loss: 0.5141 - loss: 0.5139 - val_attrition_output_accuracy: 0.2127 - val_attrition_output_loss: 0.0000e+00 - val_dept_output_accuracy: 0.6063 - val_dept_output_loss: 0.5424 - val_loss: 0.5443
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.1648 - attrition_output_loss: 0.0000e+00 - dept_output_accuracy: 0.6423 - dept_output_loss: 0.

<keras.src.callbacks.history.History at 0x10d1136cfd0>

In [17]:
# Evaluate the model with the testing data
model_loss, dept_loss, dept_accuracy, attrition_loss, attrition_accuracy = model.evaluate(X_test_scaled, [y_test_dept, y_test_attrition], verbose=0)


In [18]:
# Print the accuracy for both department and attrition
print(f"Department Accuracy: {dept_accuracy:.2f}")
print(f"Attrition Accuracy: {attrition_accuracy:.2f}")
print(f"Model Loss: {model_loss:.2f}")
print(f"Department Loss: {dept_loss:.2f}")
print(f"Attrition Loss: {attrition_loss:.2f}")




Department Accuracy: 0.00
Attrition Accuracy: 0.65
Model Loss: 10.15
Department Loss: 10.15
Attrition Loss: 0.13


# Summary

In the provided space below, briefly answer the following questions.

1. Is accuracy the best metric to use on this data? Why or why not?

2. What activation functions did you choose for your output layers, and why?

3. Can you name a few ways that this model might be improved?

YOUR ANSWERS HERE

1. In this case I do not think it is.
2. Sigmoid for binary classification and Softmax for multiclass
3. Maybe a bit more time with features and fine tuning (cleaning) the data for the model to be trained.