In [16]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR

In [22]:
# Reload CSV safely
df = pd.read_csv("preprocessed_churn.csv")

# Convert all columns except target to numeric
for col in df.columns:
    if col != "Churn":
        df[col] = pd.to_numeric(df[col], errors="coerce")

# Fill any accidental NaNs (from coercion)
df = df.fillna(0)

# Split features/target
X = df.drop("Churn", axis=1).astype(np.float32)
y = df["Churn"].astype(np.float32)

In [23]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [24]:
class ChurnDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [25]:
train_dataset = ChurnDataset(X_train, y_train)
val_dataset   = ChurnDataset(X_val, y_val)

In [27]:
class_counts = y_train.value_counts().to_dict()
weights = [1.0 / class_counts[int(label)] for label in y_train]
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

In [28]:
train_loader = DataLoader(train_dataset, batch_size=128, sampler=sampler)
val_loader   = DataLoader(val_dataset, batch_size=128, shuffle=False)

In [29]:
class ChurnANN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.SELU(),
            nn.Dropout(0.2),

            nn.Linear(16, 8),
            nn.BatchNorm1d(8),
            nn.SELU(),
            nn.Dropout(0.2),

            nn.Linear(8, 1),
            nn.Sigmoid()
        )
        # He initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)

    def forward(self, x):
        return self.net(x)

In [37]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ChurnANN(input_dim=X.shape[1]).to(device)

import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=1.0, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
    def forward(self, inputs, targets):
        inputs = torch.clamp(inputs, 1e-8, 1-1e-8)
        bce = F.binary_cross_entropy(inputs, targets, reduction="none")
        pt = torch.where(targets == 1, inputs, 1 - inputs)
        loss = self.alpha * (1 - pt) ** self.gamma * bce
        return loss.mean()

criterion = FocalLoss()
optimizer = AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

from torch.optim.lr_scheduler import SequentialLR, LinearLR
warmup_scheduler = LinearLR(optimizer, start_factor=0.1, total_iters=5)
cosine_scheduler = CosineAnnealingLR(optimizer, T_max=20)
scheduler = SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_scheduler], milestones=[5])

In [38]:
def train_model(model, train_loader, val_loader, epochs=100, patience=10):
    best_val_loss = float("inf")
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        train_losses = []
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        model.eval()
        val_losses, y_true, y_pred = [], [], []
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch).squeeze()
                loss = criterion(outputs, y_batch)
                val_losses.append(loss.item())
                y_true.extend(y_batch.cpu().numpy())
                y_pred.extend(outputs.cpu().numpy())

        val_loss = np.mean(val_losses)
        roc_auc = roc_auc_score(y_true, y_pred)
        pr_auc  = average_precision_score(y_true, y_pred)
        f1      = f1_score(y_true, (np.array(y_pred) > 0.5).astype(int))

        print(f"Epoch {epoch+1}: TrainLoss={np.mean(train_losses):.4f}, ValLoss={val_loss:.4f}, ROC_AUC={roc_auc:.4f}, PR_AUC={pr_auc:.4f}, F1={f1:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), "best_model.pth")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

        scheduler.step()

    model.load_state_dict(torch.load("best_model.pth"))
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch).squeeze()
            y_true.extend(y_batch.cpu().numpy())
            y_pred.extend(outputs.cpu().numpy())

    from sklearn.metrics import accuracy_score, precision_recall_curve
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
    f1_scores = 2*precisions*recalls/(precisions+recalls+1e-8)
    best_threshold = thresholds[np.argmax(f1_scores)]
    print(f"Best threshold: {best_threshold:.3f}")
    final_preds = (np.array(y_pred) > best_threshold).astype(int)
    final_acc = accuracy_score(y_true, final_preds)
    print(f"Final Accuracy (best threshold): {final_acc:.4f}")

train_model(model, train_loader, val_loader, epochs=200, patience=10)

Epoch 1: TrainLoss=0.4323, ValLoss=0.2573, ROC_AUC=0.4921, PR_AUC=0.2653, F1=0.3810
Epoch 2: TrainLoss=0.3496, ValLoss=0.2003, ROC_AUC=0.6450, PR_AUC=0.3651, F1=0.4851
Epoch 3: TrainLoss=0.2862, ValLoss=0.1690, ROC_AUC=0.7312, PR_AUC=0.4371, F1=0.5260
Epoch 4: TrainLoss=0.2334, ValLoss=0.1638, ROC_AUC=0.7675, PR_AUC=0.4820, F1=0.5361
Epoch 5: TrainLoss=0.2076, ValLoss=0.1539, ROC_AUC=0.7836, PR_AUC=0.5128, F1=0.5512




Epoch 6: TrainLoss=0.2021, ValLoss=0.1488, ROC_AUC=0.7980, PR_AUC=0.5412, F1=0.5630
Epoch 7: TrainLoss=0.1863, ValLoss=0.1474, ROC_AUC=0.7998, PR_AUC=0.5481, F1=0.5591
Epoch 8: TrainLoss=0.1799, ValLoss=0.1469, ROC_AUC=0.8069, PR_AUC=0.5574, F1=0.5635
Epoch 9: TrainLoss=0.1777, ValLoss=0.1447, ROC_AUC=0.8102, PR_AUC=0.5658, F1=0.5671
Epoch 10: TrainLoss=0.1666, ValLoss=0.1424, ROC_AUC=0.8121, PR_AUC=0.5680, F1=0.5699
Epoch 11: TrainLoss=0.1638, ValLoss=0.1408, ROC_AUC=0.8134, PR_AUC=0.5705, F1=0.5756
Epoch 12: TrainLoss=0.1617, ValLoss=0.1419, ROC_AUC=0.8132, PR_AUC=0.5685, F1=0.5740
Epoch 13: TrainLoss=0.1602, ValLoss=0.1405, ROC_AUC=0.8153, PR_AUC=0.5726, F1=0.5777
Epoch 14: TrainLoss=0.1566, ValLoss=0.1365, ROC_AUC=0.8176, PR_AUC=0.5763, F1=0.5701
Epoch 15: TrainLoss=0.1554, ValLoss=0.1373, ROC_AUC=0.8177, PR_AUC=0.5756, F1=0.5760
Epoch 16: TrainLoss=0.1553, ValLoss=0.1397, ROC_AUC=0.8187, PR_AUC=0.5768, F1=0.5788
Epoch 17: TrainLoss=0.1526, ValLoss=0.1379, ROC_AUC=0.8194, PR_AUC=0.

In [None]:
# Cell 7: Model Evaluation on Validation Data
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_curve

model.load_state_dict(torch.load("best_model.pth"))
model.eval()

y_true, y_pred_probs = [], []
with torch.no_grad():
    for X_batch, y_batch in val_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch).squeeze()
        y_true.extend(y_batch.cpu().numpy())
        y_pred_probs.extend(outputs.cpu().numpy())


precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred_probs)
f1_scores = 2*precisions*recalls/(precisions+recalls+1e-8)
best_threshold = thresholds[np.argmax(f1_scores)]

y_pred = (np.array(y_pred_probs) > best_threshold).astype(int)

acc = accuracy_score(y_true, y_pred)
roc = roc_auc_score(y_true, y_pred_probs)
pr  = average_precision_score(y_true, y_pred_probs)
f1  = f1_score(y_true, y_pred)
cm  = confusion_matrix(y_true, y_pred)

print(f"Validation Accuracy: {acc:.4f}")
print(f"Validation ROC-AUC: {roc:.4f}")
print(f"Validation PR-AUC: {pr:.4f}")
print(f"Validation F1: {f1:.4f}")
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_true, y_pred))

Validation Accuracy: 0.7889
Validation ROC-AUC: 0.8286
Validation PR-AUC: 0.5986
Validation F1: 0.6207

Confusion Matrix:
 [[867 166]
 [131 243]]

Classification Report:
               precision    recall  f1-score   support

         0.0       0.87      0.84      0.85      1033
         1.0       0.59      0.65      0.62       374

    accuracy                           0.79      1407
   macro avg       0.73      0.74      0.74      1407
weighted avg       0.80      0.79      0.79      1407



In [40]:
dummy_input = torch.randn(1, X.shape[1])
traced_model = torch.jit.trace(model, dummy_input)
traced_model.save("churn_ann.pt")