In [21]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle


In [22]:
## Load the dataset
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# DATA PRE-PROCESSING

In [23]:
## Dropping the irrelevant columns 
data = data.drop(['RowNumber','CustomerId','Surname'],axis = 1)
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


# Label Encoding = 'Gender'

Gender has only two categories (e.g., Male and Female).
For binary features like this, using Label Encoding is acceptable because the model only needs to distinguish between two states:
Female → 0  
Male   → 1

With just two values, the model does not mistakenly assume any meaningful “order” beyond different categories — it simply interprets the feature as a binary flag.

In [24]:
## Label Encoding = 'Gender'
## Encode categorical variables => This will categorize the variables in tto the numbers i.e. in Gender column it will give 0 for Female and 1 for male
## since here are only two categories in the Gender it is applicable and efficient if there are more than two categories then One-Hot Encoding is applied.
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


# One-Hot Encoding = 'Geography'

Geography is a categorical (nominal) feature — the values like France, Germany, Spain, etc. do not have any natural order.

If we applied Label Encoding, the model would see something like:
France → 0  
Germany → 1  
Spain → 2

This wrongly suggests:
Spain > Germany > France
and that the distances (2 vs 1 vs 0) have meaning
which is not true and can mislead the model.
So instead, we use One-Hot Encoding, which creates separate binary columns (0/1) for each country. This keeps all categories distinct without implying any ranking or numeric relationship.

In [25]:
## One-Hot Encoding = 'Geography'
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray() # .toarray() converts the sparse matrix from OneHotEncoder into a normal NumPy array so we can easily turn it into a dataframe and work with it.
geo_encoder

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [26]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [27]:
geo_encoded_df = pd.DataFrame(geo_encoder,columns = onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


We drop the original Geography column and concatenate the new one-hot encoded columns side-by-side using pd.concat(..., axis=1).

In [28]:
data = pd.concat(
    [data.drop('Geography', axis=1), geo_encoded_df],
    axis=1
)
data.head()


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


# Saving the Encoders and Scalers

After fitting the encoders and scalers on the training data, we save them using pickle.
This is important because the model must receive data transformed in exactly the same way during prediction as it did during training.
If we refit new encoders/scalers later, the mappings or scaling values might change, which would give wrong predictions.
By saving them, we can simply load and reuse the same fitted objects when deploying or predicting on new data.

In short:
-Train once
-Save the fitted encoders/scalers
-Reuse them every time we make predictions

* "wb" means write binary
-It creates (or overwrites) a file on disk
* pickle.dump(object, file)
-This stores the object in that file.

In [29]:
## Save the Encoders and scalar
with open('label_encoder_gender.pkl','wb')as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb')as file:
    pickle.dump(onehot_encoder_geo,file)    

In [30]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


# Scaling the features

We use StandardScaler to normalize the input features so that each feature has mean 0 and standard deviation 1.
This helps the model train faster and prevents features with large values from dominating the learning process.

The scaler is fitted only on the training data:

fit_transform() is applied on the training set

transform() is applied on the test set

This ensures the model never learns information from the test data, keeping the evaluation fair and realistic.

In [31]:
## Divide the dataset into independent and dependent features
X = data.drop('Exited',axis=1) # Independent features - all other features except the 'Excited' as it is dependent on all other features
Y = data['Exited'] # Dependent feature = 'Exited'

##Split the data into training and testing sets
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.2,random_state=42)
# 80% → training,20% → testing,random_state=42 just ensures reproducibility.

##Scale these features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [32]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ..., -0.99850112,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.99850112,
         1.72572313, -0.57638802]])

In [33]:
with open('Scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [34]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


# ANN Implementation

In [35]:
import tensorflow as tf


In [36]:
from tensorflow.keras.models import Sequential ## Builds a neural network as a simple, layer-by-layer pipeline.
from tensorflow.keras.layers import Dense  ## A fully-connected layer where every neuron connects to all inputs (used in hidden and output layers).
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [37]:
(X_train.shape[1],)  ## No. of columns in the training dataset => This will basically give us the number of inputs in the models

(12,)

** Building our own ANN Model

In [38]:
model = Sequential([
    Dense(64,activation = 'relu', input_shape = (X_train.shape[1],)),  # Hidden Layer 1 connected with input layer
    ## 'input_shape = (X_train.shape[1]' => This specifies the number of inputs to the first Hidden layer i.e. No. of nodes in the input layer
    ## This needs to be done for only first hidden layer
    Dense(32,activation = 'relu'), ## Hidden layer 2
    Dense(1,activation = 'sigmoid') ## Output layer - Since here is a Binary Classification problem Hence we use sigmoid function
])

In [39]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 64)                832       
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2,945
Trainable params: 2,945
Non-trainable params: 0
_________________________________________________________________


In [40]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss = tensorflow.keras.losses.binary_crossentropy
loss

<function keras.losses.binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1)>

In [50]:
## Compiling the model 
model.compile(optimizer=opt,loss=loss,metrics=['accuracy'])

In [82]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

## Set up the Tensorboard
log_dir = "logs/fit/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")# This will create a direcory
## It will store all the logs in the directory created

tensorflow_callbacks = TensorBoard(log_dir = log_dir,histogram_freq=1)

In [83]:
## Set up Early stopping => It is used to stop the training of model when it reaches to it's max potential and acuracy i.e. when the accuracy of the model will not change 
early_stopping_callbacks = EarlyStopping(monitor='val_loss',patience=12,restore_best_weights=True)

In [84]:
## Training the model
history = model.fit(
    X_train,Y_train,validation_data=(X_test,Y_test),epochs=100,
    callbacks=[tensorflow_callbacks,early_stopping_callbacks]
    )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [85]:
model.save('model.h5')

In [86]:
!pip install tensorboard



You should consider upgrading via the 'C:\Users\Kartik\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [87]:
## Load the Tensorboard Extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [88]:
%tensorboard --logdir logs/fit20260104-223157 ## The command launches TensorBoard and loads your model’s training history from the logs/fit folder.
 ##The graphs show how well your model is learning over time — and whether it’s overfitting, underfitting, or improving correctly.

Reusing TensorBoard on port 6007 (pid 4328), started 0:05:03 ago. (Use '!kill 4328' to kill it.)

Model Training completed 
Prediction is done in another notebook