In [134]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import numpy as np
from torch.utils.data import Dataset, DataLoader
### Load the pickle file
import torch
import torch.nn as nn
from torchinfo import summary
from torch.utils.tensorboard import SummaryWriter
import pickle

In [135]:
## Load the dataset
data=pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [136]:
## Preprocess the data
### Drop irrelevant columns
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [137]:
## Encode categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


In [138]:
## Onehot encode 'Geography
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [139]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [140]:
geo_encoded_df=pd.DataFrame(geo_encoder,columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [141]:
## Combine one hot encoder columns with the original data
data=pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [142]:
## Save the encoders and sscaler
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)


In [143]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [144]:
## DiVide the dataset into indepent and dependent features
X=data.drop('Exited',axis=1)
y=data['Exited']

## Split the data in training and tetsing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)



## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)




In [145]:
y_train

9254    0
1561    0
1670    1
6087    1
6669    1
       ..
5734    0
5191    0
5390    1
860     1
7270    0
Name: Exited, Length: 8000, dtype: int64

In [146]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [147]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [148]:
# Check if GPU is available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


### Create Custome Dataset and DataLoader

In [149]:

print(X_train.shape)
print(y_train.shape)
features = torch.tensor(X_train, dtype=torch.float32)
labels = torch.tensor(y_train.values, dtype=torch.float32)
features_test = torch.tensor(X_test, dtype=torch.float32).to(device)
labels_test = torch.tensor(y_test, dtype=torch.float32).to(device)

print(y_train)
print(labels)


(8000, 12)
(8000,)
9254    0
1561    0
1670    1
6087    1
6669    1
       ..
5734    0
5191    0
5390    1
860     1
7270    0
Name: Exited, Length: 8000, dtype: int64
tensor([0., 0., 1.,  ..., 1., 1., 0.])


In [150]:
# Custom PyTorch Dataset
class CustomerDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Create dataset
dataset = CustomerDataset(features, labels)

## What pin_memory=True Does?
If pin_memory=True, PyTorch allocates memory in the pinned (page-locked) region of RAM.
This makes data transfer from CPU to GPU faster because pinned memory allows direct DMA transfers.


In [151]:
# Create DataLoader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, pin_memory=True)

In [152]:


# Test DataLoader
for batch in dataloader:
    x_batch, y_batch = batch
    print("Features:", x_batch)
    print("Labels:", y_batch)
    print("Features shape", x_batch.shape)
    print("Labels shape", y_batch.shape)  
    print("Labels 2D", y_batch.view(-1,1))
    print("Labels 2D", y_batch.view(-1,1).shape)
    break  # Print only the first batch

Features: tensor([[ 2.3197e-01, -1.0950e+00, -8.5351e-02,  1.7338e+00,  1.2209e-01,
         -9.1669e-01,  6.4920e-01, -1.0258e+00, -9.5650e-01, -9.9850e-01,
          1.7257e+00, -5.7639e-01],
        [-1.4700e+00,  9.1325e-01, -7.5086e-01, -1.7365e+00,  6.0180e-01,
         -9.1669e-01,  6.4920e-01, -1.0258e+00, -1.1707e+00, -9.9850e-01,
          1.7257e+00, -5.7639e-01],
        [ 5.6405e-01,  9.1325e-01, -1.4164e+00, -1.0424e+00,  1.0403e+00,
         -9.1669e-01,  6.4920e-01,  9.7482e-01, -1.3219e+00, -9.9850e-01,
         -5.7947e-01,  1.7349e+00],
        [-8.0581e-01, -1.0950e+00, -1.8042e-01,  6.9270e-01, -1.2185e+00,
          8.0844e-01,  6.4920e-01, -1.0258e+00, -1.1852e+00, -9.9850e-01,
         -5.7947e-01,  1.7349e+00],
        [ 5.4330e-01,  9.1325e-01, -7.5086e-01, -1.7365e+00, -1.2185e+00,
          8.0844e-01,  6.4920e-01, -1.0258e+00,  1.4363e+00,  1.0015e+00,
         -5.7947e-01, -5.7639e-01],
        [ 3.5650e-01, -1.0950e+00,  3.9001e-01, -1.3447e-03,  1.2608e+

### ANN Implementation

In [153]:
input_size = X_train.shape[1]  # Define input size based on training data

model = nn.Sequential(
    nn.Linear(input_size, 64),  # HL1
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(p=0.3),
    nn.Linear(64, 32),  # HL2
    nn.BatchNorm1d(32),
    nn.ReLU(),
    nn.Dropout(p=0.3),
    nn.Linear(32, 1),  # Output layer
    nn.Sigmoid()
)

model.to(device)

print(model)  # To check the model architecture

Sequential(
  (0): Linear(in_features=12, out_features=64, bias=True)
  (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Dropout(p=0.3, inplace=False)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Dropout(p=0.3, inplace=False)
  (8): Linear(in_features=32, out_features=1, bias=True)
  (9): Sigmoid()
)


In [154]:
summary(model)

Layer (type:depth-idx)                   Param #
Sequential                               --
├─Linear: 1-1                            832
├─BatchNorm1d: 1-2                       128
├─ReLU: 1-3                              --
├─Dropout: 1-4                           --
├─Linear: 1-5                            2,080
├─BatchNorm1d: 1-6                       64
├─ReLU: 1-7                              --
├─Dropout: 1-8                           --
├─Linear: 1-9                            33
├─Sigmoid: 1-10                          --
Total params: 3,137
Trainable params: 3,137
Non-trainable params: 0

In [155]:
# Define loss function and optimizer
criterion = nn.BCELoss()  # Use BCE for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [156]:
# Hyperparameters
epochs = 100
batch_size = 32
learning_rate = 0.01

In [157]:
# TensorBoard Writer
writer = SummaryWriter("runs/churn_model")

In [158]:
# Training Loop
for epoch in range(epochs):
    epoch_loss = 0.0
    for batch in dataloader:
        X_batch, y_batch = batch  # Get features and labels
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        # Forward pass
        y_pred = model(X_batch)
        
        # Compute loss
        loss = criterion(y_pred, y_batch.view(-1, 1))  # Ensure y_batch shape is (batch_size, 1)


        
        # Backward pass
        optimizer.zero_grad()  # Reset gradients
        loss.backward()  # Compute gradients
        optimizer.step()  # Update model parameters
        
        epoch_loss += loss.item()  # Accumulate batch loss

    # Log loss to TensorBoard
    avg_loss = epoch_loss / len(dataloader)
    writer.add_scalar("Loss/train", avg_loss, epoch)
    
    # Print loss per epoch
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss / len(dataloader):.4f}")

print("Training complete!")
# Close writer
writer.close()

Epoch 1/100, Loss: 0.4836
Epoch 2/100, Loss: 0.4268
Epoch 3/100, Loss: 0.4097
Epoch 4/100, Loss: 0.3936
Epoch 5/100, Loss: 0.3926
Epoch 6/100, Loss: 0.3876
Epoch 7/100, Loss: 0.3779
Epoch 8/100, Loss: 0.3783
Epoch 9/100, Loss: 0.3676
Epoch 10/100, Loss: 0.3703
Epoch 11/100, Loss: 0.3675
Epoch 12/100, Loss: 0.3684
Epoch 13/100, Loss: 0.3709
Epoch 14/100, Loss: 0.3633
Epoch 15/100, Loss: 0.3648
Epoch 16/100, Loss: 0.3650
Epoch 17/100, Loss: 0.3622
Epoch 18/100, Loss: 0.3575
Epoch 19/100, Loss: 0.3608
Epoch 20/100, Loss: 0.3572
Epoch 21/100, Loss: 0.3580
Epoch 22/100, Loss: 0.3557
Epoch 23/100, Loss: 0.3584
Epoch 24/100, Loss: 0.3565
Epoch 25/100, Loss: 0.3549
Epoch 26/100, Loss: 0.3554
Epoch 27/100, Loss: 0.3604
Epoch 28/100, Loss: 0.3524
Epoch 29/100, Loss: 0.3613
Epoch 30/100, Loss: 0.3542
Epoch 31/100, Loss: 0.3522
Epoch 32/100, Loss: 0.3553
Epoch 33/100, Loss: 0.3496
Epoch 34/100, Loss: 0.3521
Epoch 35/100, Loss: 0.3522
Epoch 36/100, Loss: 0.3535
Epoch 37/100, Loss: 0.3520
Epoch 38/1

In [159]:
torch.save(model, "full_model.pth")

In [160]:
# Create dataset and dataloader
dataset_test = CustomerDataset(features_test, labels_test)
test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False)

In [161]:
correct = 0
total = 0

with torch.no_grad():  # No gradients needed for inference
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        y_pred = model(X_batch)  # Forward pass
        y_pred = (y_pred > 0.5).float()  # Convert to 0 or 1
        
        # Calculate accuracy
        correct += (y_pred.view(-1) == y_batch).sum().item()
        total += y_batch.size(0)

# Print final test accuracy
accuracy = correct / total

print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.8540


In [162]:
model = torch.load("full_model.pth")  # Directly load the full model
model.to(device)  # Move to GPU if available
model.eval()

  model = torch.load("full_model.pth")  # Directly load the full model


Sequential(
  (0): Linear(in_features=12, out_features=64, bias=True)
  (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): Dropout(p=0.3, inplace=False)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Dropout(p=0.3, inplace=False)
  (8): Linear(in_features=32, out_features=1, bias=True)
  (9): Sigmoid()
)

In [163]:

def predict(model, input_data):
    model.eval()  # Ensure model is in evaluation mode

    # Convert input data to a PyTorch tensor
    input_tensor = torch.tensor(input_data, dtype=torch.float32).to(device)

    with torch.no_grad():  # No need for gradient calculation
        output = model(input_tensor)
        prediction = (output > 0.5).float()  # Convert probability to 0 or 1

    return prediction.cpu().numpy()  # Convert to NumPy for easy interpretation


In [164]:
sample_data = np.array([[619, 0, 42, 2, 0.00, 1, 1, 1, 101348.88, 1, 1, 0]])  # Ensure 12 features
sample_data=torch.tensor(sample_data).to(device)
pred = predict(model, sample_data)
print("Prediction:", pred)  # Output: [1] or [0]

Prediction: [[1.]]


  input_tensor = torch.tensor(input_data, dtype=torch.float32).to(device)
