In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split   
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [2]:
## Load the dataset
data=pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
## Preprocess the data
### Drop irrelevant columns
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)
# axis =1 means column wise
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
## Encode categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [5]:
## Onehot encode 'Geography
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [6]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

#### Converting the categorical features to numerical 

In [7]:
geo_encoded_df=pd.DataFrame(geo_encoder,columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [8]:
## Combine one hot encoder columns with the original data
data=pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [9]:
## Save the encoders and sscaler
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)


In [10]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [11]:
## DiVide the dataset into indepent and dependent features
X=data.drop('Exited',axis=1)
y=data['Exited']

## Split the data in training and testing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [12]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ..., -0.99850112,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.99850112,
         1.72572313, -0.57638802]])

In [13]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [14]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


### ANN Implementation

#### EarlyStopping
 monitors a chosen metric (like validation loss) and stops training early if the model stops improving, preventing overfitting.
#### TensorBoard 
is a visualization tool that allows you to log and monitor metrics like accuracy and loss, and view model performance during training.
#### Sequential 
which is a high-level API of TensorFlow. The Sequential model allows you to stack layers linearly, one after the other, for building a neural network.
#### Dense 
which is a fully connected layer (also known as a dense layer). It is used in neural networks to connect each input node to each output node.

In [15]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [16]:
(X_train.shape[1],)

(12,)

* Input Layer: The model expects an input with the shape defined in input_shape=(X_train.shape[1],). Each feature in the input data is connected to every neuron in the first dense layer.

* Hidden Layer 1: The first dense layer with 64 neurons receives the input features, performs a weighted sum, applies the ReLU activation function, and sends the result to the next layer. 
*  The ReLU (Rectified Linear Unit) activation function is applied to each neuron’s output. This function helps introduce non-linearity. It outputs the input directly if it’s positive, and zero if it's negative.

ReLU(x)=max(0,x)
* ReLU is commonly used in hidden layers because it helps with faster training and reduces the likelihood of vanishing gradients.



* Hidden Layer 2: The second dense layer with 32 neurons receives the outputs from the first layer, processes them similarly with a weighted sum and ReLU activation, and passes the result to the output layer.

* Output Layer: The output layer, with a single neuron, applies the sigmoid activation function to produce a probability between 0 and 1, indicating the likelihood of the input belonging to a specific class (e.g., binary classification: 0 or 1).

* The sigmoid activation function is applied to the output neuron to convert its output into a probability value between 0 and 1.

Sigmoid function:
$$
\text{Sigmoid}(x) = \frac{1}{1 + e^{-x}}
$$

 
* This is ideal for binary classification tasks, where the output represents the probability of belonging to a certain class (e.g., class 1 vs. class 0). A threshold (commonly 0.5) is used to decide the predicted class.

* Training: During training, the model adjusts the weights and biases of the neurons using backpropagation and gradient descent to minimize the error (defined by a loss function like binary cross-entropy). The model learns patterns in the data over multiple epochs to predict outputs as accurately as possible.

In [17]:
## Build Our ANN Model
model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ## HL1 Connected wwith input layer
    Dense(32,activation='relu'), ## HL2
    Dense(1,activation='sigmoid')  ## output layer
]

)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
model.summary()

In [19]:
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss=tensorflow.keras.losses.BinaryCrossentropy()
loss

<LossFunctionWrapper(<function binary_crossentropy at 0x000001B09995E480>, kwargs={'from_logits': False, 'label_smoothing': 0.0, 'axis': -1})>

* Optimizer (opt): Defines how the model’s weights will be updated during training (e.g., using gradient descent).

In your code, opt represents an optimizer, which might be an instance of optimizers like:
* Adam: A popular optimizer that combines the benefits of both momentum and RMSProp.
* SGD (Stochastic Gradient Descent): A basic optimizer that updates the weights using the gradient of the loss.
* RMSProp or Adagrad: Other adaptive learning rate method

* Loss Function (binary_crossentropy): Measures how well the predicted outputs match the true labels, helping the model improve during training.
* Metric (accuracy): Used to evaluate how well the model performs during training and validation by showing the accuracy of its predictions.

In [20]:
## compile the model
model.compile(optimizer=opt,loss="binary_crossentropy",metrics=['accuracy'])

* tensorflow_callback: This defines a TensorBoard callback. The TensorBoard is a visualization tool that helps you monitor and visualize various aspects of training such as loss, accuracy, and histograms of weights and biases in your model.

#### TensorBoard(log_dir=log_dir, histogram_freq=1):

* log_dir=log_dir: This specifies the directory (as defined above) where TensorBoard will save logs during training. You can open this directory later using TensorBoard to view the training progress visually.

* histogram_freq=1: This parameter controls how often to compute histograms of weights, biases, and other tensors in the model.

* When histogram_freq=1, it calculates and logs histograms after every epoch of training.
If this were set to 0, no histograms would be computed, and TensorBoard would only log scalars like loss and accuracy.

In [21]:
## Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

The EarlyStopping callback monitors a specified metric during training and stops the training process when that metric stops improving. This helps prevent overfitting and saves computational resources by stopping training when further improvement is unlikely.

#### monitor='val_loss':
* This tells the callback to monitor the validation loss during training.
* Validation loss is a key metric to determine how well the model is performing on unseen data. If the validation loss stops decreasing (or starts increasing), it may indicate that the model is beginning to overfit the training data.

#### patience=10:
* This sets the number of epochs to wait for an improvement in the monitored metric before stopping the training.
* Setting patience=10 means the training will continue for 10 additional epochs after the last improvement in validation loss. If no improvement is observed during these epochs, the training will be halted.

#### restore_best_weights=True:
* When training stops, this parameter ensures that the model's weights are reverted to the values from the epoch with the best (lowest) validation loss.
* This is useful because the final epoch might not have the best model performance. Restoring the best weights ensures that your final model is the one that performed best on the validation set during training.

In [22]:
## Set up Early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


#### model.fit() Function
* model.fit() is the function that trains the neural network model on the training data.
It takes input data (X_train and y_train), runs the training process for a specified number of epochs, and evaluates the model on the validation data (X_test and y_test) after each epoch.


In [23]:
### Train the model
history=model.fit(
    X_train,y_train,validation_data=(X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14ms/step - accuracy: 0.8022 - loss: 0.4356 - val_accuracy: 0.8540 - val_loss: 0.3519
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.8621 - loss: 0.3547 - val_accuracy: 0.8630 - val_loss: 0.3453
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.8567 - loss: 0.3477 - val_accuracy: 0.8565 - val_loss: 0.3476
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.8633 - loss: 0.3443 - val_accuracy: 0.8545 - val_loss: 0.3543
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.8670 - loss: 0.3368 - val_accuracy: 0.8620 - val_loss: 0.3442
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.8689 - loss: 0.3308 - val_accuracy: 0.8525 - val_loss: 0.3519
Epoch 7/100
[1m

In [24]:
model.save('model.h5')



In [31]:
## Load Tensorboard Extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [34]:
%tensorboard --logdir logs/fit/20250212-002522

Reusing TensorBoard on port 6006 (pid 13592), started 0:00:39 ago. (Use '!kill 13592' to kill it.)