In [1]:
# For simple Logistic Regression (standalone model, not in neural networks), we typically don't use PyTorch either. The most common tools are: Scikit-learn (Most Common)

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch

df = pd.read_csv('ChurnData.csv')
df.head()

X = df.drop(['churn'], axis=1)
X = X.values
X = StandardScaler().fit_transform(X)

y = df[['churn']]
y = y.values

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

X_train = torch.FloatTensor(X_train)
X_val = torch.FloatTensor(X_val)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_val = torch.FloatTensor(y_val)
y_test = torch.FloatTensor(y_test)

In [3]:
from torch.utils.data import TensorDataset, DataLoader
    
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [4]:
import torch.nn as nn 

class LogisticRegModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear = nn.Linear(input_size, 1)  # note that we use Linear here like Linear Regressoin

    def forward(self, x):
        return torch.sigmoid(self.linear(x))  # note that we use this instead of self.linear(x) in Linear Regression
        # self.linear(x) gives us raw scores
        # sigmoid(self.linear(x)) converts scores to probabilities (0 to 1)


input_size = X_train.shape[1]
model = LogisticRegModel(input_size)
criterion = nn.BCELoss()  #  Binary Cross-Entropy Loss (Used specifically for binary classification)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
from sklearn.metrics import accuracy_score, classification_report

num_epochs = 100

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    
    # Training
    for batch_X, batch_y in train_loader:  # we can skip this line and use X_train and y_train instead of batch_X and batch_y
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
    
    # Validation
    model.eval()
    total_val_loss = 0
    train_preds = []
    train_true = []
    val_preds = []
    val_true = []
    
    with torch.no_grad():
        # Collect training predictions
        for batch_X, batch_y in train_loader:
            batch_pred = model(batch_X)
            train_preds.extend((batch_pred >= 0.5).float().numpy())
            train_true.extend(batch_y.numpy())
        
        # Collect validation predictions
        for batch_X, batch_y in val_loader:
            batch_pred = model(batch_X)
            val_loss = criterion(batch_pred, batch_y)
            total_val_loss += val_loss.item()
            val_preds.extend((batch_pred >= 0.5).float().numpy())
            val_true.extend(batch_y.numpy())
    
    # Calculate accuracies and average losses
    train_acc = accuracy_score(train_true, train_preds)
    val_acc = accuracy_score(val_true, val_preds)
    avg_train_loss = total_train_loss / len(train_loader)
    avg_val_loss = total_val_loss / len(val_loader)
    
    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Average Train Loss: {avg_train_loss:.4f}')
        print(f'Average Val Loss: {avg_val_loss:.4f}')
        print(f'Train Accuracy: {train_acc:.4f}')
        print(f'Validation Accuracy: {val_acc:.4f}')
        print('-' * 50)

# Final evaluation on test set
model.eval()
test_preds = []
test_true = []
total_test_loss = 0

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_pred = model(batch_X)
        test_loss = criterion(batch_pred, batch_y)
        total_test_loss += test_loss.item()
        test_preds.extend((batch_pred >= 0.5).float().numpy())
        test_true.extend(batch_y.numpy())

avg_test_loss = total_test_loss / len(test_loader)
test_acc = accuracy_score(test_true, test_preds)

print('\nTest Results:')
print(f'Average Test Loss: {avg_test_loss:.4f}')
print(f'Test Accuracy: {test_acc:.4f}')
print('\nClassification Report:')
print(classification_report(test_true, test_preds))

Epoch [10/100]
Average Train Loss: 0.4873
Average Val Loss: 0.7246
Train Accuracy: 0.7571
Validation Accuracy: 0.6000
--------------------------------------------------
Epoch [20/100]
Average Train Loss: 0.4493
Average Val Loss: 0.7307
Train Accuracy: 0.8071
Validation Accuracy: 0.5667
--------------------------------------------------
Epoch [30/100]
Average Train Loss: 0.4060
Average Val Loss: 0.7586
Train Accuracy: 0.8143
Validation Accuracy: 0.5667
--------------------------------------------------
Epoch [40/100]
Average Train Loss: 0.3780
Average Val Loss: 0.8134
Train Accuracy: 0.8214
Validation Accuracy: 0.5667
--------------------------------------------------
Epoch [50/100]
Average Train Loss: 0.3765
Average Val Loss: 0.8251
Train Accuracy: 0.8143
Validation Accuracy: 0.5667
--------------------------------------------------
Epoch [60/100]
Average Train Loss: 0.3900
Average Val Loss: 0.8616
Train Accuracy: 0.8143
Validation Accuracy: 0.5667
-------------------------------------