# Artificial Neural Network

### imports step

In [15]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import confusion_matrix, accuracy_score

In [16]:
tf.__version__

'2.1.0'

### functions step

In [17]:
# Defining the functions that will be used to create and train the model
def load_data(url):
    if url != None:
        dataset = pd.read_csv(url, error_bad_lines=False)
    return dataset


def encode_gender(toChange):
    le = LabelEncoder()
    toChange[:,2] = le.fit_transform(toChange[:,2])
    
    return toChange


def encode_geo(toChange):
    ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
    toChange = np.array(ct.fit_transform(toChange))
    
    return toChange


def split_data(x, y, testSize, randomState):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=testSize, random_state=randomState)
    return [x_train, x_test, y_train, y_test]


def feature_scale(train, test):
    sc = StandardScaler()
    x_train = sc.fit_transform(train)
    x_test = sc.fit_transform(test)
    
    return [x_train, x_test, sc]


def build_ann(unit1, actFunc, unit2, actFunc2, opt, lossFunc, metric):
    ann = tf.keras.models.Sequential()
    ann.add(tf.keras.layers.Dense(units=unit1, activation=actFunc))
    ann.add(tf.keras.layers.Dense(units=unit1, activation=actFunc))
    ann.add(tf.keras.layers.Dense(units=unit2, activation=actFunc2))
    ann.compile(optimizer = opt, loss = lossFunc, metrics = metric)
    
    return ann

def confMat(x, y):
    cm = confusion_matrix(y_test, y_pred)
    return cm

### load_data step

In [18]:
# loading the dataset from my github
dataset = load_data("https://raw.githubusercontent.com/sumanthnallamotu/kale/master/Churn_Modeling.csv")
x = dataset.iloc[:,3:-1].values
y = dataset.iloc[:,-1].values
print(x)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


### encoding_data step

In [19]:
# Encode categorical data
x = encode_gender(x)
x = encode_geo(x)

### split_data step

In [20]:
# Split the dataset into the training and test sets and then feature scale them
x_train, x_test, y_train, y_test = split_data(x, y, 0.2, 0)
x_train, x_test, sc = feature_scale(x_train, x_test)

## build_ann step

In [21]:
# Build and train the model
ann = build_ann(5, 'relu', 1, 'sigmoid', 'adam', 'binary_crossentropy', ['accuracy'])
ann.fit(x_train, y_train, batch_size = 32, epochs = 20)

Train on 8000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x118fc8828>

## prediction step

In [22]:
# Any input for a the prediction method should be a 2D array
# You also have to standardize the input by calling sc.transform()
# You could use something like > 0.5 to give a straight answer based on the prediction
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5)

y_pred = ann.predict(x_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test), 1)),1))

[[False]]
[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


### confusion_matrix step

In [23]:
# Confusion matrix tells you how many predictions from the model were correct
# The model correctly predicted that 1529 would stay in the bank and incorrectly predicted that 66 would stay
# The model correctly predicted that 200 would leave the bank and incorrectly predicted that 205 would stay
cm = confMat(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)


[[1506   89]
 [ 201  204]]


0.855

In [26]:
print(cm[[1]])

IndexError: list index out of range