In [1]:
import pandas as pd


train_data_path = 'balanced_train_data.csv'
validation_data_path = 'validation_data.csv'
test_data_path = 'test_data.csv'

train_data = pd.read_csv(train_data_path)
validation_data = pd.read_csv(validation_data_path)
test_data = pd.read_csv(test_data_path)

# Display the first few rows of the train dataset
train_data.head()

Unnamed: 0,Customer_Age,Dependent_count,Education_Level,Income_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,...,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,Gender_M,Marital_Status_Married,Marital_Status_Single,Card_Category_Gold,Card_Category_Platinum,Card_Category_Silver,Attrition_Flag
0,0.489362,1.0,4,1,0.604651,0.4,0.166667,0.666667,0.094617,0.0,...,0.131783,0.192663,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1
1,0.531915,0.8,2,0,0.674419,0.4,0.333333,0.5,1.0,0.700437,...,0.24031,0.294035,0.051051,1.0,0.0,1.0,0.0,0.0,0.0,1
2,0.255319,0.6,2,2,0.348837,0.4,0.166667,0.333333,0.096975,0.667461,...,0.193798,0.186782,0.362362,1.0,1.0,0.0,0.0,0.0,0.0,1
3,0.680851,0.2,2,4,0.813953,0.6,0.5,0.166667,0.071973,0.747716,...,0.449612,0.24895,0.493493,0.0,0.0,1.0,0.0,0.0,0.0,1
4,0.191489,0.8,2,2,0.209302,0.4,0.5,0.5,0.220049,0.285658,...,0.379845,0.16662,0.082082,1.0,0.0,1.0,0.0,0.0,0.0,1


In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Splitting data into features and target
X_train = train_data.drop(columns=['Attrition_Flag'])
y_train = train_data['Attrition_Flag']

X_val = validation_data.drop(columns=['Attrition_Flag'])
y_val = validation_data['Attrition_Flag']

X_test = test_data.drop(columns=['Attrition_Flag'])
y_test = test_data['Attrition_Flag']

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)

X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create DataLoader for training, validation, and test datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the Fully Connected Neural Network
class FCNN(nn.Module):
    def __init__(self, input_dim):
        super(FCNN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.fc(x)

# Initialize the model, loss function, and optimizer
input_dim = X_train.shape[1]
model = FCNN(input_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and validation loop
epochs = 10
for epoch in range(epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_outputs = []
    val_targets = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            val_outputs.extend(outputs.numpy().flatten())
            val_targets.extend(y_batch.numpy().flatten())
    
    # Calculate validation metrics
    val_predictions = [1 if p > 0.5 else 0 for p in val_outputs]
    val_accuracy = accuracy_score(val_targets, val_predictions)
    val_precision = precision_score(val_targets, val_predictions)
    val_recall = recall_score(val_targets, val_predictions)
    val_f1 = f1_score(val_targets, val_predictions)
    val_roc_auc = roc_auc_score(val_targets, val_outputs)

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss/len(train_loader):.4f}, "
          f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_accuracy:.4f}, "
          f"Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}, "
          f"ROC AUC: {val_roc_auc:.4f}")

# Test phase
model.eval()
test_outputs = []
test_targets = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        test_outputs.extend(outputs.numpy().flatten())
        test_targets.extend(y_batch.numpy().flatten())

# Calculate test metrics
test_predictions = [1 if p > 0.5 else 0 for p in test_outputs]
test_accuracy = accuracy_score(test_targets, test_predictions)
test_precision = precision_score(test_targets, test_predictions)
test_recall = recall_score(test_targets, test_predictions)
test_f1 = f1_score(test_targets, test_predictions)
test_roc_auc = roc_auc_score(test_targets, test_outputs)

print("\nTest Results:")
print(f"Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, "
      f"Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}, ROC AUC: {test_roc_auc:.4f}")


Epoch 1/10, Training Loss: 0.5002, Validation Loss: 0.4029, Accuracy: 0.8286, Precision: 0.9334, Recall: 0.8571, F1 Score: 0.8936, ROC AUC: 0.8496
Epoch 2/10, Training Loss: 0.3746, Validation Loss: 0.3404, Accuracy: 0.8528, Precision: 0.9505, Recall: 0.8700, F1 Score: 0.9085, ROC AUC: 0.9016
Epoch 3/10, Training Loss: 0.3337, Validation Loss: 0.3341, Accuracy: 0.8523, Precision: 0.9534, Recall: 0.8665, F1 Score: 0.9079, ROC AUC: 0.9170
Epoch 4/10, Training Loss: 0.3078, Validation Loss: 0.3621, Accuracy: 0.8459, Precision: 0.9621, Recall: 0.8500, F1 Score: 0.9026, ROC AUC: 0.9197
Epoch 5/10, Training Loss: 0.2911, Validation Loss: 0.3215, Accuracy: 0.8662, Precision: 0.9685, Recall: 0.8688, F1 Score: 0.9160, ROC AUC: 0.9329
Epoch 6/10, Training Loss: 0.2880, Validation Loss: 0.2851, Accuracy: 0.8825, Precision: 0.9632, Recall: 0.8941, F1 Score: 0.9274, ROC AUC: 0.9341
Epoch 7/10, Training Loss: 0.2710, Validation Loss: 0.2653, Accuracy: 0.8933, Precision: 0.9667, Recall: 0.9041, F1 Sc

In [6]:
from sklearn.model_selection import train_test_split
import itertools

# Define hyperparameter search space
param_grid = {
    'learning_rate': [0.001, 0.0005, 0.0001],
    'batch_size': [32, 64],
    'dropout_rate': [0.2, 0.3, 0.4],
    'hidden_sizes': [(64, 128, 64), (128, 256, 128)]
}

# Grid Search over all combinations
best_accuracy = 0
best_params = None

for lr, batch_size, dropout_rate, hidden_sizes in itertools.product(
    param_grid['learning_rate'],
    param_grid['batch_size'],
    param_grid['dropout_rate'],
    param_grid['hidden_sizes']
):
    # Define the model with current hyperparameters
    class FCNN(nn.Module):
        def __init__(self, input_dim):
            super(FCNN, self).__init__()
            self.fc = nn.Sequential(
                nn.Linear(input_dim, hidden_sizes[0]),
                nn.ReLU(),
                nn.BatchNorm1d(hidden_sizes[0]),
                nn.Dropout(dropout_rate),
                nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                nn.ReLU(),
                nn.BatchNorm1d(hidden_sizes[1]),
                nn.Dropout(dropout_rate),
                nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                nn.ReLU(),
                nn.BatchNorm1d(hidden_sizes[2]),
                nn.Dropout(dropout_rate),
                nn.Linear(hidden_sizes[2], 1),
                nn.Sigmoid()
            )

        def forward(self, x):
            return self.fc(x)

    # Initialize model, loss, and optimizer
    model = FCNN(X_train.shape[1])
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training loop (simplified for testing parameters)
    for epoch in range(5):  # Train only a few epochs per grid search iteration
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

    # Evaluate on validation data
    model.eval()
    val_outputs = []
    val_targets = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            val_outputs.extend(outputs.numpy().flatten())
            val_targets.extend(y_batch.numpy().flatten())
    val_predictions = [1 if p > 0.5 else 0 for p in val_outputs]
    val_accuracy = accuracy_score(val_targets, val_predictions)

    # Track best model
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_params = {
            'learning_rate': lr,
            'batch_size': batch_size,
            'dropout_rate': dropout_rate,
            'hidden_sizes': hidden_sizes
        }

print(f"Best Accuracy: {best_accuracy:.4f}")
print(f"Best Hyperparameters: {best_params}")


Best Accuracy: 0.8983
Best Hyperparameters: {'learning_rate': 0.001, 'batch_size': 64, 'dropout_rate': 0.2, 'hidden_sizes': (128, 256, 128)}


In [8]:
# Define the best model with selected hyperparameters
class FCNNBest(nn.Module):
    def __init__(self, input_dim):
        super(FCNNBest, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.fc(x)

# Initialize the model
model = FCNNBest(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and validation loop
epochs = 30
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

for epoch in range(epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_outputs = []
    val_targets = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            val_outputs.extend(outputs.numpy().flatten())
            val_targets.extend(y_batch.numpy().flatten())
    
    # Calculate validation metrics
    val_predictions = [1 if p > 0.5 else 0 for p in val_outputs]
    val_accuracy = accuracy_score(val_targets, val_predictions)
    val_precision = precision_score(val_targets, val_predictions)
    val_recall = recall_score(val_targets, val_predictions)
    val_f1 = f1_score(val_targets, val_predictions)
    val_roc_auc = roc_auc_score(val_targets, val_outputs)

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss/len(train_loader):.4f}, "
          f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_accuracy:.4f}, "
          f"Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}, "
          f"ROC AUC: {val_roc_auc:.4f}")

# Final Test Evaluation
model.eval()
test_outputs = []
test_targets = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        test_outputs.extend(outputs.numpy().flatten())
        test_targets.extend(y_batch.numpy().flatten())

# Test Metrics
test_predictions = [1 if p > 0.5 else 0 for p in test_outputs]
test_accuracy = accuracy_score(test_targets, test_predictions)
test_precision = precision_score(test_targets, test_predictions)
test_recall = recall_score(test_targets, test_predictions)
test_f1 = f1_score(test_targets, test_predictions)
test_roc_auc = roc_auc_score(test_targets, test_outputs)

print("\nTest Results with Best Hyperparameters:")
print(f"Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, "
      f"Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}, ROC AUC: {test_roc_auc:.4f}")


Epoch 1/30, Training Loss: 0.4094, Validation Loss: 0.3334, Accuracy: 0.8548, Precision: 0.9466, Recall: 0.8765, F1 Score: 0.9102, ROC AUC: 0.8973
Epoch 2/30, Training Loss: 0.3032, Validation Loss: 0.3075, Accuracy: 0.8736, Precision: 0.9570, Recall: 0.8894, F1 Score: 0.9220, ROC AUC: 0.9212
Epoch 3/30, Training Loss: 0.2686, Validation Loss: 0.3339, Accuracy: 0.8548, Precision: 0.9631, Recall: 0.8600, F1 Score: 0.9086, ROC AUC: 0.9229
Epoch 4/30, Training Loss: 0.2449, Validation Loss: 0.3004, Accuracy: 0.8726, Precision: 0.9634, Recall: 0.8818, F1 Score: 0.9208, ROC AUC: 0.9354
Epoch 5/30, Training Loss: 0.2263, Validation Loss: 0.2947, Accuracy: 0.8795, Precision: 0.9691, Recall: 0.8847, F1 Score: 0.9250, ROC AUC: 0.9377
Epoch 6/30, Training Loss: 0.2115, Validation Loss: 0.2661, Accuracy: 0.8889, Precision: 0.9659, Recall: 0.8994, F1 Score: 0.9315, ROC AUC: 0.9414
Epoch 7/30, Training Loss: 0.2064, Validation Loss: 0.2510, Accuracy: 0.8973, Precision: 0.9680, Recall: 0.9076, F1 Sc

In [10]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Identify numerical columns (categorical columns are already encoded)
numerical_cols = [col for col in X_train.columns if col not in ['Attrition_Flag']]

# Normalize numerical features
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_val[numerical_cols] = scaler.transform(X_val[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

# 1. Create PyTorch Dataset
class TabularDataset(Dataset):
    def __init__(self, X, y=None):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1) if y is not None else None

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        return self.X[idx]

train_dataset = TabularDataset(X_train, y_train)
val_dataset = TabularDataset(X_val, y_val)
test_dataset = TabularDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 2. Define TabTransformer Model
class TabTransformer(nn.Module):
    def __init__(self, input_dim, attention_dim, num_heads, num_layers):
        super(TabTransformer, self).__init__()
        self.attention_layers = nn.ModuleList([
            nn.TransformerEncoderLayer(d_model=attention_dim, nhead=num_heads, batch_first=True)
            for _ in range(num_layers)
        ])
        self.fc_in = nn.Linear(input_dim, attention_dim)
        self.fc_out = nn.Sequential(
            nn.Linear(attention_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # Project input to attention space
        x = self.fc_in(x).unsqueeze(1)  # Add sequence dimension
        # Apply attention layers
        for layer in self.attention_layers:
            x = layer(x)
        # Global average pooling and output
        pooled = x.mean(dim=1)
        return self.fc_out(pooled)

# Model Configuration
input_dim = X_train.shape[1]
attention_dim = 64
num_heads = 4
num_layers = 2

model = TabTransformer(input_dim, attention_dim, num_heads, num_layers)

# Loss and Optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 3. Training Loop
epochs = 10
for epoch in range(epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_outputs = []
    val_targets = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            val_outputs.extend(outputs.numpy().flatten())
            val_targets.extend(y_batch.numpy().flatten())

    val_predictions = [1 if p > 0.5 else 0 for p in val_outputs]
    val_accuracy = accuracy_score(val_targets, val_predictions)
    val_precision = precision_score(val_targets, val_predictions)
    val_recall = recall_score(val_targets, val_predictions)
    val_f1 = f1_score(val_targets, val_predictions)
    val_roc_auc = roc_auc_score(val_targets, val_outputs)

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {train_loss/len(train_loader):.4f}, "
          f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_accuracy:.4f}, "
          f"Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}, "
          f"ROC AUC: {val_roc_auc:.4f}")

# 4. Test Evaluation
model.eval()
test_outputs = []
test_targets = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        test_outputs.extend(outputs.numpy().flatten())
        test_targets.extend(y_batch.numpy().flatten())

# Test Metrics
test_predictions = [1 if p > 0.5 else 0 for p in test_outputs]
test_accuracy = accuracy_score(test_targets, test_predictions)
test_precision = precision_score(test_targets, test_predictions)
test_recall = recall_score(test_targets, test_predictions)
test_f1 = f1_score(test_targets, test_predictions)
test_roc_auc = roc_auc_score(test_targets, test_outputs)

print("\nTest Results:")
print(f"Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, "
      f"Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}, ROC AUC: {test_roc_auc:.4f}")



Epoch 1/10, Training Loss: 0.2926, Validation Loss: 0.2916, Accuracy: 0.8854, Precision: 0.9723, Recall: 0.8888, F1 Score: 0.9287, ROC AUC: 0.9441
Epoch 2/10, Training Loss: 0.2118, Validation Loss: 0.2830, Accuracy: 0.8844, Precision: 0.9791, Recall: 0.8812, F1 Score: 0.9276, ROC AUC: 0.9547
Epoch 3/10, Training Loss: 0.1872, Validation Loss: 0.2596, Accuracy: 0.9101, Precision: 0.9617, Recall: 0.9300, F1 Score: 0.9456, ROC AUC: 0.9492
Epoch 4/10, Training Loss: 0.1770, Validation Loss: 0.2232, Accuracy: 0.8993, Precision: 0.9746, Recall: 0.9035, F1 Score: 0.9377, ROC AUC: 0.9590
Epoch 5/10, Training Loss: 0.1581, Validation Loss: 0.2255, Accuracy: 0.9096, Precision: 0.9773, Recall: 0.9135, F1 Score: 0.9444, ROC AUC: 0.9601
Epoch 6/10, Training Loss: 0.1558, Validation Loss: 0.2354, Accuracy: 0.9062, Precision: 0.9785, Recall: 0.9082, F1 Score: 0.9420, ROC AUC: 0.9604
Epoch 7/10, Training Loss: 0.1371, Validation Loss: 0.2197, Accuracy: 0.9106, Precision: 0.9780, Recall: 0.9141, F1 Sc