# ISLP - Chapter 10 - Exercise 7
### Author: pzuehlke

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torchinfo import summary

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

We begin by setting the device to GPU if available:

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda:0


Let's load the dataset:

In [3]:
data = pd.read_csv("Default.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   default  10000 non-null  object 
 1   student  10000 non-null  object 
 2   balance  10000 non-null  float64
 3   income   10000 non-null  float64
dtypes: float64(2), object(2)
memory usage: 312.6+ KB


In [4]:
data = data.dropna()
data.head(10)

Unnamed: 0,default,student,balance,income
0,No,No,729.526495,44361.625074
1,No,Yes,817.180407,12106.1347
2,No,No,1073.549164,31767.138947
3,No,No,529.250605,35704.493935
4,No,No,785.655883,38463.495879
5,No,Yes,919.58853,7491.558572
6,No,No,825.513331,24905.226578
7,No,Yes,808.667504,17600.451344
8,No,No,1161.057854,37468.529288
9,No,No,0.0,29275.268293


Note that `student` and `default` are the only categorical variables. Let's
convert them to numeric, create the feature matrix and response variable, and
split the data into training and test sets in an $ 80 / 20 $ proportion:

In [5]:
data["default"] = data["default"].map({"Yes": 1, "No": 0})
data["student"] = data["student"].map({"Yes": 1, "No": 0})
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   default  10000 non-null  int64  
 1   student  10000 non-null  int64  
 2   balance  10000 non-null  float64
 3   income   10000 non-null  float64
dtypes: float64(2), int64(2)
memory usage: 312.6 KB


In [6]:
X = data.drop("default", axis=1)
y = data["default"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

In [7]:
print(X_train["student"].mean())

0.29475


We see that $ 29.5\% $ of the people in the training data are students.
Next let's scale the features:

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)  # transform and not fit_transform to prevent data leakage

print(round(X_train_scaled.mean(), 4),
      round(X_train_scaled.std() , 4)
)
print(round(X_test_scaled.mean(), 4),
      round(X_test_scaled.std() , 4)
)

-0.0 1.0
0.0054 1.0064


Now we need to convert everything to Pytorch tensors and create a DataLoader for batch training:

In [9]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).reshape(-1, 1).to(device)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

We are now ready to define the neural network specified in the statement (one single hidden layer with $ 10 $ units plus dropout):

<img src="nn_architecture.svg" alt="Neural network diagram" width="800" height="400">

In [10]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim=10, dropout_rate=0.4):
        super().__init__()
        self.layer_1 = nn.Linear(input_dim, hidden_dim)  # fully connected layer 1
        self.dropout = nn.Dropout(dropout_rate)
        self.layer_2 = nn.Linear(hidden_dim, 1)  # fully connected hidden layer, 10 units
        
    def forward(self, x):
        x = torch.relu(self.layer_1(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.layer_2(x))
        return x

Let's instantiate the neural net:

In [11]:
input_dim = X_train.shape[1]
hidden_dim = 10
dropout_rate = 0.5
nn_model = NeuralNetwork(input_dim, hidden_dim, dropout_rate).to(device)
summary(nn_model, input_size=X_train.shape,
    col_names=["input_size", "output_size", "num_params"]
)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
NeuralNetwork                            [8000, 3]                 [8000, 1]                 --
├─Linear: 1-1                            [8000, 3]                 [8000, 10]                40
├─Dropout: 1-2                           [8000, 10]                [8000, 10]                --
├─Linear: 1-3                            [8000, 10]                [8000, 1]                 11
Total params: 51
Trainable params: 51
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.41
Input size (MB): 0.10
Forward/backward pass size (MB): 0.70
Params size (MB): 0.00
Estimated Total Size (MB): 0.80

As expected, the NN has $ 3 \times 10 + 10 = 40 $ (first layer) plus $ 10 + 1 = 11 $
(hidden layer) parameters, for a total of $ 51 $.
Now we define the loss criterion (binary cross-entropy) and the optimizer (Adam):

In [12]:
nn_criterion = nn.BCELoss()
nn_optimizer = optim.Adam(nn_model.parameters(), lr=0.001)

We are all set to train the neural net. The following cell should takes about $ 7 $ seconds to run
using $ 100 $ epochs for training and over a minute for $ 1\,000 $ epochs.

In [13]:
nn_model.train()  # set model to training mode
num_epochs = 100
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for inputs, labels in train_loader:  # loop through batches
        # Forward pass:
        outputs = nn_model(inputs)  # get predictions from model
        loss = nn_criterion(outputs, labels)  # calculate loss
        
        # Backward and optimize:
        nn_optimizer.zero_grad()  # clear previous gradients
        loss.backward()  # compute gradients
        nn_optimizer.step()  # update weights
        
        epoch_loss += loss.item()  # accumulate batch loss
    
    # Print progress every 10 epochs:
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], loss: {epoch_loss/len(train_loader):.4f}")

Epoch [10/100], loss: 0.1333
Epoch [20/100], loss: 0.0997
Epoch [30/100], loss: 0.0917
Epoch [40/100], loss: 0.0923
Epoch [50/100], loss: 0.0908
Epoch [60/100], loss: 0.0891
Epoch [70/100], loss: 0.0860
Epoch [80/100], loss: 0.0891
Epoch [90/100], loss: 0.0848
Epoch [100/100], loss: 0.0873


In [14]:
def evaluate_model(model, X, y):
    model.eval()  # set to eval mode
    with torch.no_grad():  # disable gradient calculation
        y_pred = model(X)
        y_pred_class = (y_pred > 0.5).float()
        
        # Move to CPU for sklearn metrics:
        y_true_np = y.cpu().numpy()
        y_pred_np = y_pred_class.cpu().numpy()
        
        report = classification_report(y_true_np, y_pred_np, output_dict=True)
        
    return report

Finally, we evaluate the performance of our NN:

In [None]:
nn_report = evaluate_model(nn_model, X_test_tensor, y_test_tensor)
pd.DataFrame(nn_report).drop(["macro avg", "weighted avg"], axis=1)

Unnamed: 0,0.0,1.0,accuracy
precision,0.967337,0.9,0.967
recall,0.999481,0.121622,0.967
f1-score,0.983146,0.214286,0.967
support,1926.0,74.0,0.967


In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

lr_report = classification_report(y_test, y_pred, output_dict=True)
pd.DataFrame(lr_report).drop(["macro avg", "weighted avg"], axis=1)

Unnamed: 0,0,1,accuracy
precision,0.974125,0.793103,0.9715
recall,0.996885,0.310811,0.9715
f1-score,0.985373,0.446602,0.9715
support,1926.0,74.0,0.9715
