In [218]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import os


dataset = pd.read_csv(r"C:\Users\Ahsan\Downloads\archive (1)\Bank Customer Churn Prediction.csv")
print(dataset.head())



   customer_id  credit_score country  gender  age  tenure    balance  \
0     15634602           619  France  Female   42       2       0.00   
1     15647311           608   Spain  Female   41       1   83807.86   
2     15619304           502  France  Female   42       8  159660.80   
3     15701354           699  France  Female   39       1       0.00   
4     15737888           850   Spain  Female   43       2  125510.82   

   products_number  credit_card  active_member  estimated_salary  churn  
0                1            1              1         101348.88      1  
1                1            0              1         112542.58      0  
2                3            1              0         113931.57      1  
3                2            0              0          93826.63      0  
4                1            1              1          79084.10      0  


In [219]:
dataset.isnull().sum()

customer_id         0
credit_score        0
country             0
gender              0
age                 0
tenure              0
balance             0
products_number     0
credit_card         0
active_member       0
estimated_salary    0
churn               0
dtype: int64

In [220]:
input_data = dataset.iloc[:,:-1]
output_data = dataset.iloc[:,:-1]

In [221]:

# Separate numerical columns
numeric_cols = input_data.select_dtypes(include=['int64', 'float64']).columns



In [222]:
# Separate categorical columns
categorical_cols = input_data.select_dtypes(include=['object']).columns

In [223]:
ss = StandardScaler()
input_data[numeric_cols] = ss.fit_transform(input_data[numeric_cols])


In [224]:
# Encode categorical columns (One-Hot Encoding)
input_data = pd.get_dummies(input_data, columns=categorical_cols, drop_first=True)

input_data.head()


Unnamed: 0,customer_id,credit_score,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,country_Germany,country_Spain,gender_Male
0,-0.783213,-0.326221,0.293517,-1.04176,-1.225848,-0.911583,0.646092,0.970243,0.021886,False,False,False
1,-0.606534,-0.440036,0.198164,-1.387538,0.11735,-0.911583,-1.547768,0.970243,0.216534,False,True,False
2,-0.995885,-1.536794,0.293517,1.032908,1.333053,2.527057,0.646092,-1.03067,0.240687,False,False,False
3,0.144767,0.501521,0.007457,-1.387538,-1.225848,0.807737,-1.547768,-1.03067,-0.108918,False,False,False
4,0.652659,2.063884,0.388871,-1.04176,0.785728,-0.911583,0.646092,0.970243,-0.365276,False,True,False


In [225]:
input_data.shape

(10000, 12)

In [226]:
# 1. SAMPLE DATASET 
# Here we create a dummy dataset with 12 features and binary target
np.random.seed(42)
num_samples = 500
num_features = 12

X = pd.DataFrame(np.random.randn(num_samples, num_features), columns=[f'feat{i+1}' for i in range(num_features)])
y = pd.Series(np.random.randint(0, 2, size=num_samples), name='target')  # binary labels 0/1

#TRAIN/TEST SPLIT 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [227]:
#  PREPROCESSING 
# Ensure numeric type
X_train = X_train.apply(pd.to_numeric, errors='coerce').fillna(0).astype(np.float32)
X_test  = X_test.apply(pd.to_numeric, errors='coerce').fillna(0).astype(np.float32)

y_train = pd.to_numeric(y_train, errors='coerce').fillna(0).astype(np.float32)
y_test  = pd.to_numeric(y_test, errors='coerce').fillna(0).astype(np.float32)


In [228]:
# Convert labels to 0/1 (binary)
y_train = (y_train > 0).astype(np.float32)
y_test  = (y_test > 0).astype(np.float32)

In [229]:

#  CONVERT TO TORCH TENSORS 
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [230]:
# DATA LOADERS 
train_dataset = TensorDataset(X_train, y_train)
test_dataset  = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [231]:
# ANN MODEL 
class ANN_Model(nn.Module):
    def __init__(self, input_size):
        super(ANN_Model, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.layers(x)

model = ANN_Model(input_size=X_train.shape[1])
print(model)


ANN_Model(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=1, bias=True)
    (5): Sigmoid()
  )
)


In [232]:
#  LOSS & OPTIMIZER 
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [233]:
# TRAINING 
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

Epoch 1/10, Loss: 0.7041
Epoch 2/10, Loss: 0.6782
Epoch 3/10, Loss: 0.6963
Epoch 4/10, Loss: 0.6638
Epoch 5/10, Loss: 0.6985
Epoch 6/10, Loss: 0.6978
Epoch 7/10, Loss: 0.7062
Epoch 8/10, Loss: 0.6747
Epoch 9/10, Loss: 0.6680
Epoch 10/10, Loss: 0.6910


In [234]:
# EVALUATION 
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        preds = (outputs > 0.5).float()
        total += y_batch.size(0)
        correct += (preds == y_batch).sum().item()

print(f" Accuracy on Test Set: {100 * correct / total:.2f}%")

 Accuracy on Test Set: 56.00%
