# HR Employee Attrition


### Importing Libraries, Dataset, and EDA

In [None]:
#importing libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

#tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout

#EarlyStopping
from tensorflow.keras.callbacks import EarlyStopping

#model evaluation
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix


In [None]:
#importing dataset
df = pd.read_csv('../input/ibm-hr-analytics-attrition-dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv')

In [None]:
df.head()

In [None]:
df.describe().T

In [None]:
#examining missing values with heatmap
sns.heatmap(df.isnull(),cbar=False)

In [None]:
#accessing target variable distribution
print(df['Attrition'].value_counts())
print(df['Attrition'].hist())

In [None]:
#encoding target variable "Attrition" as 0 and 1 for "No" and "Yes" respectively
df.Attrition.replace({"Yes":1 , "No":0} , inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
#getting dummy variables for categorical variables
df = pd.get_dummies(df)

In [None]:
df.info()

In [None]:
#accessing information value of individual features and correlation analysis
sns.heatmap(df.corr(),cmap='Spectral')

In [None]:
df.corr()['Attrition'].sort_values(ascending=False)

In [None]:
#pairplot visualization
sns.pairplot(df[["OverTime_Yes","MaritalStatus_Single","JobLevel","TotalWorkingYears","OverTime_No","YearsInCurrentRole","Attrition"]],hue="Attrition")

### Data Preprocessing

In [None]:
#preparing features and labels
X = df.drop('Attrition',axis=1).values
y = df['Attrition'].values

In [None]:
#splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state =1)

In [None]:
#Scaling features variables
scaler = MinMaxScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

### Keras Classification Model

In [None]:
X_train.shape

In [None]:
#building sequential neural network
model = Sequential()

model.add(Dense(units=100,activation='relu'))
model.add(Dropout(0.5)) 

model.add(Dense(units=50,activation='relu'))
model.add(Dropout(0.5)) 

model.add(Dense(units=25,activation='relu'))
model.add(Dropout(0.5)) 

model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer = 'adam')

In [None]:
#using early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss',mode='min',verbose=1, patience=5)

In [None]:
#fitting model into training datasets
model.fit(x = X_train,
          y = y_train,
          batch_size = 128,
          epochs = 100,
          validation_data = (X_test,y_test),verbose =1,
          callbacks = [early_stop]
          )

#### Model Performance Evaluation

In [None]:

#plotting training and validation losses
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()

In [None]:
#confusion matrix
y_pred = model.predict_classes(X_test)
print(confusion_matrix(y_test,y_pred))

In [None]:
#classification report
print(classification_report(y_test,y_pred))

In [None]:
#ROC AUC Score
print("ROC AUC Score: ",roc_auc_score(y_test,y_pred))