# ISLP - Chapter 10 - Exercise 7
### Author: pzuehlke

In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score

In [24]:
# Set device to GPU if available:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda:0


In [25]:
np.random.seed(0)
torch.manual_seed(0)

data = pd.read_csv("Default.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   default  10000 non-null  object 
 1   student  10000 non-null  object 
 2   balance  10000 non-null  float64
 3   income   10000 non-null  float64
dtypes: float64(2), object(2)
memory usage: 312.6+ KB


In [26]:
data = data.dropna()
data.head(10)

Unnamed: 0,default,student,balance,income
0,No,No,729.526495,44361.625074
1,No,Yes,817.180407,12106.1347
2,No,No,1073.549164,31767.138947
3,No,No,529.250605,35704.493935
4,No,No,785.655883,38463.495879
5,No,Yes,919.58853,7491.558572
6,No,No,825.513331,24905.226578
7,No,Yes,808.667504,17600.451344
8,No,No,1161.057854,37468.529288
9,No,No,0.0,29275.268293


Let's create the response and split the data into training and test sets in an $ 80 / 20 $ proportion:

In [27]:
data["default"] = (data["default"] == "Yes").astype(int)
data["student"] = (data["student"] == "Yes").astype(int)

X = data.drop("default", axis=1)
y = data["default"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

print(X_train["student"].mean())

0.29475


We see that $ 29.4\% $ of the people in the training data are students. It is
important to standardize the numeric predictors and to transform `student`
status into a binary variable:

In [28]:
numeric_features = ["balance", "income"]
categorical_features = ["student"]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(drop="first"), categorical_features)
    ]
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Checking the result:
pd.DataFrame(X_train_processed).head()

Unnamed: 0,0,1,2
0,0.376487,-1.369519,1.0
1,0.388315,-1.531383,1.0
2,0.53313,1.107356,0.0
3,-0.642354,0.510267,0.0
4,-0.29349,1.014149,0.0


Let's create a procedure to encapsulate the data in a format that PyTorch can work with (compare p. $ 441 $ of the lab):

In [None]:
class DefaultDataset(Dataset):
    def __init__(self, features, targets, device):
        self.features = torch.tensor(features, dtype=torch.float32).to(device)
        self.targets = torch.tensor(targets.values, dtype=torch.float32).reshape(-1, 1).to(device)
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

In [None]:
train_dataset = DefaultDataset(X_train_processed, y_train, device)
test_dataset = DefaultDataset(X_test_processed, y_test, device)

Now we create the dataloaders (again, following p. $ 441 $):

In [17]:
bs = 32
train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=bs)
input_dim = X_train.shape[1]  # 3 features
print(input_dim)

3


We are now ready to define the neural network specified in the statement (one single hidden layer with $ 10 $ units plus dropout):

<img src="nn_architecture.svg" alt="Neural network diagram" width="800" height="400">

In [31]:
class DefaultNeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=10, dropout_rate=0.4):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)  # fully connected layer 1
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_dim, 1)  # fully connected hidden layer, 10 units
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

Now we instantiate the model:

In [36]:
model = DefaultNeuralNet(input_dim, hidden_dim=10, dropout_rate=0.3).to(device)
summary(model, input_size=X_train.shape,
    col_names=["input_size", "output_size", "num_params"]
)
# Verify model is on the correct device:
print(f"Model is on: {next(model.parameters()).device}")

Model is on: cuda:0


As expected, the NN has $ 3 \times 10 + 10 = 40 $ (first layer) plus $ 10 + 1 = 11 $
(hidden layer) parameters, for a total of $ 51 $.

In [None]:
criterion = nn.BCELoss()  # cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function:
def train_model(model, train_loader, criterion, optimizer, device, epochs=100):
    losses = []
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        
        for X_batch, y_batch in train_loader:
            # Double-check device placement:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            # Forward pass:
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            # Backward pass and optimize:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Calculate average loss for the epoch:
        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)
        
        # Print loss every 10 epochs:
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")
    
    return losses

In [50]:
# Train the neural network:
print("Training Neural Network...")
losses = train_model(model, train_loader, criterion, optimizer, device, epochs=100)

Training Neural Network...
Epoch 10/100, Loss: 0.0804
Epoch 20/100, Loss: 0.0827
Epoch 30/100, Loss: 0.0814
Epoch 40/100, Loss: 0.0837
Epoch 50/100, Loss: 0.0817
Epoch 60/100, Loss: 0.0816
Epoch 70/100, Loss: 0.0826
Epoch 80/100, Loss: 0.0814
Epoch 90/100, Loss: 0.0818
Epoch 100/100, Loss: 0.0832


In [67]:
# Evaluation function:
def evaluate_model(model, data_loader):
    # Move model to CPU for evaluation:
    model = model.to("cpu")
    model.eval()
    
    y_true = []
    y_pred = []
    y_prob = []
    
    with torch.no_grad():
        for features, targets in data_loader:
            # Forward pass:
            outputs = model(features)
            predicted = (outputs >= 0.5).float()
            
            y_true.extend(targets.numpy().flatten())
            y_pred.extend(predicted.numpy().flatten())
            y_prob.extend(outputs.numpy().flatten())
    
    # Calculate metrics:
    accuracy = accuracy_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_prob)
    
    # Move model back to original device if needed for further training:
    if torch.cuda.is_available():
        model = model.to("cuda:0")
    
    return accuracy, conf_matrix, report, roc_auc, y_true, y_pred, y_prob

In [68]:
# Evaluate the NN:
nn_accuracy, nn_conf_matrix, nn_report, nn_roc_auc, y_true, y_pred_nn, y_prob_nn =\
    evaluate_model(model, test_loader)

print(f"Neural Network Accuracy: {nn_accuracy:.4f}")
print(f"Neural Network ROC AUC: {nn_roc_auc:.4f}")
print("Neural Network Confusion Matrix:")
print(nn_conf_matrix)
print("Neural Network Classification Report:")
print(nn_report)

Neural Network Accuracy: 0.9705
Neural Network ROC AUC: 0.9452
Neural Network Confusion Matrix:
[[1923    3]
 [  56   18]]
Neural Network Classification Report:
              precision    recall  f1-score   support

         0.0       0.97      1.00      0.98      1926
         1.0       0.86      0.24      0.38        74

    accuracy                           0.97      2000
   macro avg       0.91      0.62      0.68      2000
weighted avg       0.97      0.97      0.96      2000

