# Reproducing the Single-Hidden-Layer NN from Ch.10 in ISLR2 with Pytorch

#### In the 2nd edition of [An Introduction to Statistical Learning](https://www.statlearning.com/), Deep Learning is introduced in Chapter 10. 
#### Using the [Hitters dataset](https://www.statlearning.com/resources-second-edition), the goal is to predict the Salary of a baseball player in 1987 using his performance statistics from 1986. The dataset consisists 263 players and 19 variables. We randomly split the data into a training set of 176 players (two thirds), and a test set of 87 players (one third). 

## Basic approach with Multiple Linear Regression (20 parameters)

In [3]:
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression

In [4]:
Hitters = pd.read_csv('data/Hitters.csv', header=0, na_values='NA')

# drop rows NA values and reindex
Hitters = Hitters.dropna().reset_index(drop=True) 
print(Hitters.shape)
Hitters.head()

FileNotFoundError: [Errno 2] No such file or directory: 'data/Hitters.csv'

In [None]:
# convert categorical to dummy variables
y = Hitters.Salary
dummies = pd.get_dummies(Hitters[['League', 'Division', 'NewLeague']])

# drop response and categorical variables
X_prep = Hitters.drop(['Salary', 'League', 'Division', 'NewLeague'], axis = 1).astype('float64')
X = pd.concat([X_prep,  dummies[['League_A', 'Division_E', 'NewLeague_A']]], axis=1)
print(X.shape)
X.head()

In [None]:
# scale so that each feature has mean zero and variance one
scaler = StandardScaler()
X[X.columns] = scaler.fit_transform(X[X.columns])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)

In [None]:
LR = LinearRegression()
LR.fit(X_train, y_train)
y_pred = LR.predict(X_test)
model_skill = mean_absolute_error(y_test, y_pred)
print(f"mean absolute error: {model_skill}")

## Deep Learning approach with Pytorch (1051 Parameters)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
torch.manual_seed(13)
import numpy as np
import matplotlib.pyplot as plt

In [None]:
DROPOUT = 0.4
BATCH_SIZE = 32
LEARNING_RATE = 0.01
RELU_UNITS = 50
EPOCHS = 1500

X_train_tensor = torch.tensor(X_train.values.astype(np.float32))
y_train_tensor = torch.tensor(y_train.values.astype(np.float32))
dataset = TensorDataset(X_train_tensor, y_train_tensor)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

### Define Model Architecture 

In [None]:
class LR(nn.Module):
    def __init__(self):
        super(LR, self).__init__()
        self.hidden = nn.Linear(in_features=X_train.shape[1], out_features=RELU_UNITS)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(DROPOUT)
        self.output = nn.Linear(RELU_UNITS, 1)
        

    def forward(self, x):
        x = self.hidden(x)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.output(x)
        return x


In [None]:
model = LR().to(device)
print(model)
total_parameters = sum(p.numel() for p in model.parameters())
print (f"model parameters: {total_parameters}")

### Train Model

In [None]:
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE)

In [None]:
size = len(dataloader.dataset)
loss_all = []

for epoch in range(EPOCHS):
    # print(f"-------------\nEpoch {epoch + 1}")
    # Loop over batches in an epoch using DataLoader
    current = 0
    for batch, (x_batch, y_batch) in enumerate(dataloader):
        
        # compute prediction error
        y_batch_pred = model(x_batch)
        loss = criterion(y_batch_pred, y_batch.view(-1, 1))

        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 2 == 0:
            loss = loss.item()
            current = (batch+1) * len(x_batch)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
    loss_all.append(loss.item())

### Plot Training Loss

In [None]:
plt.figure(figsize=(10, 8))
plt.plot(loss_all)
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss [MSE]', fontsize=20)

### Skill on test set

In [None]:
mae = nn.L1Loss()
y_test_tensor = torch.tensor(y_test.values).view(-1, 1)
y_pred_tensor = model(torch.tensor(X_test.values.astype(np.float32)))
model_skill = mae(y_test_tensor, y_pred_tensor).detach().numpy()
print(f"mean absolute error: {model_skill}")