# ANN Classification - Bank Customer Retention
## Part 2 - ANN TRAINING
In this notebook, we load the preprocessed training and testing dataset files and train ANN models.

> **INPUT:** the preprocessed training and testing dataset files.<br>
> **OUTPUT:** the trained ANN models.

### 1. INITIALIZATION

In [74]:
# Import necessary libraries and modules
import pandas as pd
import tensorflow as ts
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

In [75]:
# Check tensorflow version
ts.__version__

'2.16.1'

### 2. LOADING DATASET FILES

In [76]:
# Prepare file location and load dataset
data_file_location = "..\\data\\interim\\"
data_train_file_name = "churn_modelling_preprocessed_train"
data_test_file_name = "churn_modelling_preprocessed_test"
data_file_ext = "csv"

data_train = pd.read_csv(data_file_location + data_train_file_name + "." + data_file_ext)
data_test = pd.read_csv(data_file_location + data_test_file_name + "." + data_file_ext)

In [77]:
# Check training dataset head
data_train.head()

Unnamed: 0,Geography_Germany,Geography_Spain,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,-0.569844,1.74309,0.169582,-1.091687,-0.464608,0.006661,-1.215717,0.809503,0.642595,-1.03227,1.106432,0.0
1,1.754865,-0.573694,-2.304559,0.916013,0.301026,-1.37744,-0.006312,-0.921591,0.642595,0.968738,-0.748664,0.0
2,-0.569844,-0.573694,-1.191196,-1.091687,-0.943129,-1.031415,0.579935,-0.921591,0.642595,-1.03227,1.485335,0.0
3,-0.569844,1.74309,0.035566,0.916013,0.109617,0.006661,0.473128,-0.921591,0.642595,-1.03227,1.276528,0.0
4,-0.569844,1.74309,2.056114,-1.091687,1.736588,1.044737,0.810193,0.809503,0.642595,0.968738,0.558378,0.0


In [78]:
# Check testing dataset head
data_test.head()

Unnamed: 0,Geography_Germany,Geography_Spain,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1.754865,-0.573694,-0.552043,-1.091687,-0.368904,1.044737,0.879303,-0.921591,0.642595,0.968738,1.610857,0.0
1,-0.569844,-0.573694,-1.314903,-1.091687,0.109617,-1.031415,0.429722,-0.921591,0.642595,-1.03227,0.49587,1.0
2,-0.569844,1.74309,0.57163,-1.091687,0.301026,1.044737,0.308583,-0.921591,0.642595,0.968738,-0.424787,0.0
3,-0.569844,-0.573694,1.416961,0.916013,-0.656016,-0.339364,0.575336,-0.921591,-1.55619,-1.03227,-0.187777,0.0
4,1.754865,-0.573694,0.57163,0.916013,-0.081791,0.006661,1.389611,0.809503,0.642595,0.968738,0.616842,0.0


In [79]:
# Split dataset files into independent and dependent features
X_train = data_train.iloc[:,0:-1]
y_train = data_train.iloc[:,-1].values.reshape(-1,1)
X_test = data_test.iloc[:,0:-1]
y_test = data_test.iloc[:,-1].values

### 2. BUILDING ANN MODEL

In [80]:
# Initialize the ANN
ann = ts.keras.models.Sequential()

In [81]:
# Add the input layer and the first hidden layer
ann.add(ts.keras.layers.Dense(units=6, activation="relu"))

In [82]:
# Add the second hidden layer
ann.add(ts.keras.layers.Dense(units=6, activation="relu"))

In [83]:
# Add the output layer
ann.add(ts.keras.layers.Dense(units=1, activation="sigmoid"))

### 3. TRAINING ANN MODEL

In [84]:
# Compiling the ANN
ann.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy","recall","precision","f1_score"])

In [85]:
# Training the model
ann.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7961 - f1_score: 0.3374 - loss: 0.5193 - precision: 0.0000e+00 - recall: 0.0000e+00
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 749us/step - accuracy: 0.7939 - f1_score: 0.3417 - loss: 0.4713 - precision: 0.0000e+00 - recall: 0.0000e+00
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 736us/step - accuracy: 0.7914 - f1_score: 0.3450 - loss: 0.4569 - precision: 0.0000e+00 - recall: 0.0000e+00
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 772us/step - accuracy: 0.7952 - f1_score: 0.3396 - loss: 0.4442 - precision: 0.0000e+00 - recall: 0.0000e+00
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 716us/step - accuracy: 0.8021 - f1_score: 0.3322 - loss: 0.4265 - precision: 0.3717 - recall: 0.0082      
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x25206e093d0>

### 4. EVALUATION

In [86]:
# Predicting the testing samples
y_pred = ann.predict(X_test)

# Converting probabilities into predictions
y_pred = (y_pred > 0.5)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  


In [87]:
# Calculate performance metrics
results = pd.DataFrame(index=["ANN Base"], columns=["Accuracy", "Recall", "Precision", "F1"])
results.iloc[0] = [accuracy_score(y_test, y_pred), recall_score(y_test, y_pred), precision_score(y_test, y_pred), f1_score(y_test, y_pred)]

In [88]:
# Print results
results

Unnamed: 0,Accuracy,Recall,Precision,F1
ANN Base,0.855,0.446914,0.732794,0.555215
