In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # Data visulization and Analysis

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
df = pd.read_csv('../input/HR_comma_sep.csv')

### Preprocessing the Data

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.rename(columns={'sales': 'department'}, inplace=True)

In [None]:
df.department.unique()

In [None]:
df.salary.unique()

In [None]:
# convert the categorical columns to numbers by converting them to dummy variables

In [None]:
df_final = pd.get_dummies(df, columns=['department', 'salary'], drop_first=True)

In [None]:
df_final

### Separating Your Training and Testing Datasets

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# We will predict left column

# input features
X = df_final.drop(['left'], axis=1).values

# output
y = df_final['left'].values

In [None]:
X

In [None]:
# Spliting data into training and testing (70% training and 30% testing)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
X_train

### Transforming the Data

In [None]:
# To scale the training set and the test set
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Building the Artificial Neural Network

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [None]:
# Sequential to initialize a linear stack of layers
# Since this is a classification problem, we'll create a classifier variable
classifier = Sequential()

In [None]:
# adding layers to your network
classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim = 18))

In [None]:
classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))

In [None]:
classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

In [None]:
classifier.fit(X_train, y_train, batch_size = 10, epochs = 1)

### Running Predictions on the Test Set

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
y_pred = (y_pred > 0.5)

### Checking the Confusion Matrix

In [None]:
# To evaluate how well the model performed on the predictions, you will next use a confusion matrix.

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

### Making a Single Prediction

In [None]:
new_pred = classifier.predict(sc.transform(np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.]])))

In [None]:
new_pred = (new_pred > 0.6)

In [None]:
new_pred

### Improving the Model Accuracy

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

In [None]:
def make_classifier():
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim=18))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"])
    return classifier

In [None]:
classifier = KerasClassifier(build_fn = make_classifier, batch_size = 10, nb_epoch = 1)

In [None]:
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)

In [None]:
mean = accuracies.mean()
mean

In [None]:
variance = accuracies.var()
variance

### Adding Dropout Regularization to Fight Over-Fitting

In [None]:
from keras.layers import Dropout

classifier = Sequential()
classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim = 18))
classifier.add(Dropout(rate = 0.1))
classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])



### Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

def make_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(9, kernel_initializer = "uniform", activation = "relu", input_dim = 18))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])
    return classifier

In [None]:
classifier = KerasClassifier(build_fn = make_classifier)

In [None]:
params = {
    'batch_size':[20,35],
    'epochs':[2,3],
    'optimizer':['adam','rmsprop']
}

In [None]:
grid_search = GridSearchCV(estimator = classifier, param_grid = params, scoring = "accuracy", cv = 2)

In [None]:
grid_search = grid_search.fit(X_train, y_train)

In [None]:
best_param = grid_search.best_params_
best_accuracy = grid_search.best_score_

In [None]:
best_param

In [None]:
best_accuracy

In [None]:
# used Keras to build an artificial neural network that predicts the probability that an employee will leave a company

# END