In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix

### Datasets

In [24]:
HR = pd.read_csv('C:/Users/Nithin/Downloads/HR_ANN/HR_comma_sep.csv')

### Data Prep

In [25]:
HR.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14999 entries, 0 to 14998
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   satisfaction_level     14999 non-null  float64
 1   last_evaluation        14999 non-null  float64
 2   number_project         14999 non-null  int64  
 3   average_montly_hours   14999 non-null  int64  
 4   time_spend_company     14999 non-null  int64  
 5   Work_accident          14999 non-null  int64  
 6   left                   14999 non-null  int64  
 7   promotion_last_5years  14999 non-null  int64  
 8   department             14999 non-null  object 
 9   salary                 14999 non-null  object 
dtypes: float64(2), int64(6), object(2)
memory usage: 1.1+ MB


In [26]:
HR.isna().sum()

satisfaction_level       0
last_evaluation          0
number_project           0
average_montly_hours     0
time_spend_company       0
Work_accident            0
left                     0
promotion_last_5years    0
department               0
salary                   0
dtype: int64

In [27]:
# Encoding Categorical Features
numerical_features = ['satisfaction_level', 'last_evaluation', 'number_project',
     'average_montly_hours', 'time_spend_company']

categorical_features = ['Work_accident','promotion_last_5years', 'department', 'salary']

In [28]:
# An utility function to create dummy variable
def create_dummies( df, colname ):
    col_dummies = pd.get_dummies(df[colname], prefix=colname)
    col_dummies.drop(col_dummies.columns[0], axis=1, inplace=True)
    df = pd.concat([df, col_dummies], axis=1)
    df.drop( colname, axis = 1, inplace = True )
    return df

In [29]:
for c_feature in categorical_features:
  HR = create_dummies( HR, c_feature )

In [30]:
#Splitting the data

feature_columns = HR.columns.difference( ['left'] )
feature_columns1 = feature_columns[1:5]

### Train & Test Model

In [31]:
train_x,test_x,train_y,test_y = train_test_split(HR[feature_columns],HR['left'],test_size=0.3,random_state=12345)

### Model

In [32]:
SC = StandardScaler()

SC.fit(train_x)

StandardScaler()

In [33]:
train_x = SC.transform(train_x)
test_x = SC.transform(test_x)

In [34]:
MLP = MLPClassifier(hidden_layer_sizes=(3),verbose=True)

MLP.fit(train_x,train_y)

Iteration 1, loss = 0.60582232
Iteration 2, loss = 0.56701996
Iteration 3, loss = 0.53749485
Iteration 4, loss = 0.51480728
Iteration 5, loss = 0.49678871
Iteration 6, loss = 0.48225111
Iteration 7, loss = 0.46882798
Iteration 8, loss = 0.45646837
Iteration 9, loss = 0.44519826
Iteration 10, loss = 0.43616989
Iteration 11, loss = 0.42856800
Iteration 12, loss = 0.42187459
Iteration 13, loss = 0.41548496
Iteration 14, loss = 0.40924122
Iteration 15, loss = 0.40302892
Iteration 16, loss = 0.39671322
Iteration 17, loss = 0.39004831
Iteration 18, loss = 0.38315689
Iteration 19, loss = 0.37624840
Iteration 20, loss = 0.36933137
Iteration 21, loss = 0.36299653
Iteration 22, loss = 0.35757035
Iteration 23, loss = 0.35266242
Iteration 24, loss = 0.34827637
Iteration 25, loss = 0.34418550
Iteration 26, loss = 0.34040871
Iteration 27, loss = 0.33677674
Iteration 28, loss = 0.33346392
Iteration 29, loss = 0.33036498
Iteration 30, loss = 0.32751283
Iteration 31, loss = 0.32482572
Iteration 32, los



MLPClassifier(hidden_layer_sizes=3, verbose=True)

### Evaluation Metrics

In [35]:
predictors = MLP.predict(test_x)

In [36]:
confusion_matrix(test_y,predictors)

array([[3290,  178],
       [ 152,  880]], dtype=int64)

In [37]:
print(classification_report(test_y,predictors))

              precision    recall  f1-score   support

           0       0.96      0.95      0.95      3468
           1       0.83      0.85      0.84      1032

    accuracy                           0.93      4500
   macro avg       0.89      0.90      0.90      4500
weighted avg       0.93      0.93      0.93      4500



In [38]:
len(MLP.coefs_)

2

In [39]:
len(MLP.coefs_[0])

18

In [40]:
len(MLP.intercepts_[0])

3