# Importing Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

# Feature Transformations and Engineering

In [None]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
df = df.drop(['RowNumber','CustomerId','Surname'], axis=1)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [None]:
onehotencoder =OneHotEncoder()
geo_encoded = onehotencoder.fit_transform(df[['Geography']])
geo_encoded

<10000x3 sparse matrix of type '<class 'numpy.float64'>'
	with 10000 stored elements in Compressed Sparse Row format>

In [None]:
onehotencoder.get_feature_names_out()

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [None]:
geo_encoded_df = pd.DataFrame(geo_encoded.toarray(), columns=onehotencoder.get_feature_names_out())
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [None]:
df = pd.concat([df.drop('Geography', axis =1), geo_encoded_df], axis =1)
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [None]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [None]:
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder,file)

with open('onehotencoder_geo.pkl','wb') as file:
    pickle.dump(onehotencoder, file)

In [None]:
X = df.drop('Exited', axis=1).values
y = df['Exited'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler, file)

# ANN Implementation

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

In [None]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class ANNModel(nn.Module):
    def __init__(self, input_dim):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.output = nn.Linear(16, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.output(x))
        return x

In [None]:
input_dim = X_train.shape[1]
model = ANNModel(input_dim)

In [None]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

best_loss = np.inf
patience = 10
trigger_times = 0

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    val_loss /= len(test_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}')

    if val_loss < best_loss:
        best_loss = val_loss
        best_model = model.state_dict()
        trigger_times = 0
    else:
        trigger_times += 1

    if trigger_times >= patience:
        print('Early stopping!')
        model.load_state_dict(best_model)
        break

Epoch [1/100], Loss: 0.4916, Val Loss: 0.4146
Epoch [2/100], Loss: 0.4036, Val Loss: 0.3687
Epoch [3/100], Loss: 0.3553, Val Loss: 0.3433
Epoch [4/100], Loss: 0.3419, Val Loss: 0.3421
Epoch [5/100], Loss: 0.3373, Val Loss: 0.3413
Epoch [6/100], Loss: 0.3333, Val Loss: 0.3406
Epoch [7/100], Loss: 0.3300, Val Loss: 0.3381
Epoch [8/100], Loss: 0.3287, Val Loss: 0.3399
Epoch [9/100], Loss: 0.3244, Val Loss: 0.3582
Epoch [10/100], Loss: 0.3215, Val Loss: 0.3384
Epoch [11/100], Loss: 0.3184, Val Loss: 0.3380
Epoch [12/100], Loss: 0.3189, Val Loss: 0.3373
Epoch [13/100], Loss: 0.3165, Val Loss: 0.3375
Epoch [14/100], Loss: 0.3136, Val Loss: 0.3454
Epoch [15/100], Loss: 0.3115, Val Loss: 0.3418
Epoch [16/100], Loss: 0.3101, Val Loss: 0.3447
Epoch [17/100], Loss: 0.3088, Val Loss: 0.3466
Epoch [18/100], Loss: 0.3075, Val Loss: 0.3423
Epoch [19/100], Loss: 0.3050, Val Loss: 0.3433
Epoch [20/100], Loss: 0.3038, Val Loss: 0.3495
Epoch [21/100], Loss: 0.3024, Val Loss: 0.3430
Epoch [22/100], Loss: 

# Model Evaluation and Testing

In [None]:
model.eval()
y_pred_list = []
with torch.no_grad():
    for inputs in X_test_tensor:
        y_pred = model(inputs)
        y_pred = torch.round(y_pred)  # Convert to binary
        y_pred_list.append(y_pred.item())

accuracy = accuracy_score(y_test, y_pred_list)
print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.8630


In [None]:
sample_input = torch.tensor(X_test[0], dtype=torch.float32)
sample_input = sample_input.unsqueeze(0)

# Prediction
model.eval()
with torch.no_grad():
    prediction = model(sample_input)
    prediction = torch.round(prediction).item()

print(f'Prediction: {prediction} (1 indicates exited, 0 indicates not exited)')

Prediction: 0.0 (1 indicates exited, 0 indicates not exited)


In [None]:
scripted_model = torch.jit.script(model)
scripted_model.save('model.pt')