In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import category_encoders as ce
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
%load_ext tensorboard

In [3]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(histogram_freq=1)

In [4]:
df_hr = pd.read_csv('HR_comma_sep.csv')

In [5]:
df_hr

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.80,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low
...,...,...,...,...,...,...,...,...,...,...
14994,0.40,0.57,2,151,3,0,1,0,support,low
14995,0.37,0.48,2,160,3,0,1,0,support,low
14996,0.37,0.53,2,143,3,0,1,0,support,low
14997,0.11,0.96,6,280,4,0,1,0,support,low


In [6]:
oneh = ce.OneHotEncoder(cols=['sales', 'salary'])

In [7]:
oneh.fit(df_hr)

OneHotEncoder(cols=['sales', 'salary'])

In [8]:
X_cleaned = oneh.transform(df_hr)

In [9]:
X_cleaned.describe()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales_1,sales_2,...,sales_4,sales_5,sales_6,sales_7,sales_8,sales_9,sales_10,salary_1,salary_2,salary_3
count,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,...,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0,14999.0
mean,0.612834,0.716102,3.803054,201.050337,3.498233,0.14461,0.238083,0.021268,0.276018,0.051137,...,0.181345,0.14861,0.042003,0.081805,0.060137,0.057204,0.05247,0.487766,0.429762,0.082472
std,0.248631,0.171169,1.232592,49.943099,1.460136,0.351719,0.425924,0.144281,0.447041,0.220284,...,0.385317,0.355715,0.200602,0.274077,0.237749,0.232239,0.222981,0.499867,0.495059,0.275092
min,0.09,0.36,2.0,96.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.44,0.56,3.0,156.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.64,0.72,4.0,200.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.82,0.87,5.0,245.0,4.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
max,1.0,1.0,7.0,310.0,10.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
standard_transformer = Pipeline(steps=[
    ('standard', StandardScaler())])

minmax_transformer = Pipeline(steps=[
    ('minmax', MinMaxScaler())])


preprocessor = ColumnTransformer(
    remainder='passthrough',  # passthough features not listed
    transformers=[
        ('std', standard_transforme4r, [
         'last_evaluation', 'satisfaction_level', 'Work_accident']),
        ('mm', minmax_transformer, ['number_project',
         'average_montly_hours', 'time_spend_company'])
    ])


In [11]:
preprocessor.fit(X_cleaned.drop('left',axis=1))

ColumnTransformer(remainder='passthrough',
                  transformers=[('std',
                                 Pipeline(steps=[('standard',
                                                  StandardScaler())]),
                                 ['last_evaluation', 'satisfaction_level',
                                  'Work_accident']),
                                ('mm',
                                 Pipeline(steps=[('minmax', MinMaxScaler())]),
                                 ['number_project', 'average_montly_hours',
                                  'time_spend_company'])])

In [12]:
df_final = preprocessor.transform(X_cleaned.drop('left',axis=1))
df_final

array([[-1.08727529, -0.93649469, -0.41116529, ...,  1.        ,
         0.        ,  0.        ],
       [ 0.84070693,  0.75281433, -0.41116529, ...,  0.        ,
         1.        ,  0.        ],
       [ 0.95755433, -2.02247906, -0.41116529, ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [-1.08727529, -0.97671633, -0.41116529, ...,  1.        ,
         0.        ,  0.        ],
       [ 1.42494396, -2.02247906, -0.41116529, ...,  1.        ,
         0.        ,  0.        ],
       [-1.14569899, -0.97671633, -0.41116529, ...,  1.        ,
         0.        ,  0.        ]])

In [13]:
df_final.shape

(14999, 20)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(df_final, X_cleaned['left'], test_size=0.2, random_state=42)

In [15]:
model=tf.keras.Sequential([
                           tf.keras.layers.Dense(256,activation='tanh',input_shape=(20,)),
                           tf.keras.layers.Dropout(0.1),
                           tf.keras.layers.Dense(108,activation='relu'),
                           tf.keras.layers.Dropout(0.3), 
                           tf.keras.layers.Dense(56,activation='tanh'),
                           tf.keras.layers.Dropout(0.125),
                           tf.keras.layers.Dense(24,activation='exponential'),
                           tf.keras.layers.Dropout(0.3),
                           tf.keras.layers.Dense(1,activation='sigmoid')
])

In [20]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-1,
    decay_steps=100,
    decay_rate=0.999)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
model.compile(optimizer=optimizer,
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

In [21]:
model.fit(X_train,y_train, epochs=100,verbose=2,validation_data=[X_test,y_test],batch_size=64,callbacks=[tensorboard_callback])

Epoch 1/100
188/188 - 1s - loss: 0.1606 - accuracy: 0.9499 - val_loss: 0.1479 - val_accuracy: 0.9560 - 1s/epoch - 7ms/step
Epoch 2/100
188/188 - 1s - loss: 0.1599 - accuracy: 0.9485 - val_loss: 0.1439 - val_accuracy: 0.9627 - 551ms/epoch - 3ms/step
Epoch 3/100
188/188 - 1s - loss: 0.1591 - accuracy: 0.9517 - val_loss: 0.1942 - val_accuracy: 0.9360 - 542ms/epoch - 3ms/step
Epoch 4/100
188/188 - 1s - loss: 0.1534 - accuracy: 0.9519 - val_loss: 0.2103 - val_accuracy: 0.9170 - 573ms/epoch - 3ms/step
Epoch 5/100
188/188 - 1s - loss: 0.1555 - accuracy: 0.9505 - val_loss: 0.1685 - val_accuracy: 0.9460 - 538ms/epoch - 3ms/step
Epoch 6/100
188/188 - 1s - loss: 0.1434 - accuracy: 0.9563 - val_loss: 0.1496 - val_accuracy: 0.9580 - 774ms/epoch - 4ms/step
Epoch 7/100
188/188 - 1s - loss: 0.1457 - accuracy: 0.9570 - val_loss: 0.1376 - val_accuracy: 0.9600 - 578ms/epoch - 3ms/step
Epoch 8/100
188/188 - 1s - loss: 0.1430 - accuracy: 0.9563 - val_loss: 0.1379 - val_accuracy: 0.9623 - 559ms/epoch - 3ms/

<keras.callbacks.History at 0x1cd35a58970>

In [None]:
y_pred = model.predict(X_test)
y_pred = np.where(y_pred>0.5,1,0)
tf.math.confusion_matrix(y_test,y_pred)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[2250,   44],
       [  77,  629]], dtype=int32)>

In [None]:
report = pd.DataFrame(classification_report(y_test,y_pred,output_dict=True)).transpose()

In [None]:
report

Unnamed: 0,precision,recall,f1-score,support
0,0.96691,0.98082,0.973815,2294.0
1,0.934621,0.890935,0.912255,706.0
accuracy,0.959667,0.959667,0.959667,0.959667
macro avg,0.950766,0.935877,0.943035,3000.0
weighted avg,0.959311,0.959667,0.959328,3000.0
