# ANN

In [None]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Business case: To predict whether customer will leave bank or not

In [None]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13]
y = dataset.iloc[:, 13]

In [None]:
dataset

In [None]:
dataset.head()

In [None]:
dataset.Exited.value_counts()

In [None]:
#Create dummy variables
geography=pd.get_dummies(X["Geography"],drop_first=True)
gender=pd.get_dummies(X['Gender'],drop_first=True)

In [None]:
geography

In [None]:
## Concatenate the Data Frames

X=pd.concat([X,geography,gender],axis=1)

## Drop Unnecessary columns
X=X.drop(['Geography','Gender'],axis=1)

In [None]:
X

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [None]:
X_train.shape

TensorFlow is an open-source end-to-end platform which allows developers to create machine learning applications 
using various tools, libraries, and community resources. 

It is the most famous deep learning library.


In [None]:
#!pip install tensorflow


In [None]:
import tensorflow
print(tensorflow.__version__)

In [None]:
# Part 2 - Now let's make the ANN!

# Importing the Keras libraries and packages
import tensorflow.keras
from tensorflow.keras.models import Sequential #Sequential library is responsible in creating any kind of NN.
from tensorflow.keras.layers import Dense      # to create hidden layers
from tensorflow.keras.layers import Dropout    # regularisation parameter. Helps to avoid overfitting

In [None]:
# Initialising the ANN...
# creating an ANN model using Sequential API
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 10,kernel_initializer='he_uniform',activation='relu',input_dim = 11))
# 11 input parameters with 10 neurons in the first hidden layer



# units: no. of neurons in hidden layer
# activation: Activation function to use. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x).
# kernel_initializer: initialises the weights based on some statistical distribution. It generates 
    # numbers from that statistical distribution and use as starting weights.
    # https://mmuratarat.github.io/2019-02-25/xavier-glorot-he-weight-init
# input_dim: no of input parameters



# Adding the second hidden layer
classifier.add(Dense(units = 10, kernel_initializer = 'he_uniform',activation='relu')) 


# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))



# Compiling the ANN (setting up the optimizer)
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])


# optimizers: updates the weight in back propogation to minimize the loss function
         # https://2809ayushic.medium.com/optimizers-in-deep-learning-31db684c73cf
# loss: what loss function we use
         # https://data-flair.training/blogs/keras-loss-functions/

In [None]:
classifier.summary() # input layer will not be included in this

In [None]:
# Total params - how many weight and bias params are there
# Trainable params: number of weights that will be updated during training with backpropagation
# Non-trainable params: number of weights that will not be updated during training with backpropagation

In [None]:
## we have created our neural network

In [None]:
# Fitting the ANN to the Training set
model_history=classifier.fit(X_train, y_train,validation_split=0.33, batch_size = 100,epochs = 100)

#  Validation set is used for tuning the parameters of a model. 
    # Its different from Test set which is only used to test the performance of a trained model. 
# Batch size is the number of samples that will be passed through to the network at one time.
       # In our dataset:
       # total records: 10000
       # Batch size: 100 (100 records in each batch)
       # for 1st Epoch: 
                  # 100 iterations  (10000/100=100) 
                  # [1st iteration, uses 100 records, then next 100 records, and so on]
                  # Weights will get updated.
        # for 2nd Epoch:
                  # again follows the same as done in 1st iteration.
        # goes till 100 epochs.. as you have given epochs=100
# Epochs: Act of sending the data from i/p layer to o/p layer then all the way back 

In [None]:
model_history.history.keys()
# for a model trained on a classification problem with a validation dataset, this might 
# produce the following listing:

In [None]:
# list all data in history
print(model_history.history.keys())

# summarize history for accuracy
plt.plot(model_history.history['accuracy'])
plt.plot(model_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# Part 3 - Making the predictions and evaluating the model

# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred

In [None]:
# Outputs a boolean matrix. 
y_pred = (y_pred > 0.5)
y_pred

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [None]:
cm

In [None]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred,y_test)


In [None]:
score

**Dropout:** When we create a deeper NN where you have more no. of weight parameters and bias parameters, then our model will tend to overfit because when we keep updating the weight, it tries to fit the training data.

In [None]:
# Compiling the model:
    #After defining our model and stacking the layers, we have to configure our model. We do this 
    #configuration process in the compilation phase.Before training the model we need to compile it 
    # and define the loss function, optimizers, and metrics for prediction.

Using smaller batch sizes have faster convergence to “good” solutions. This is intuitively explained by the fact that smaller batch sizes allow the model to “start learning before having to see all the data.”

**Different accuracy_score each time**
Because the ANN randomly initializes the weights and bais each time, the initial weights and bais will be different, resulting in different weights and bais learned by the network, and the results will be different.