# 10.9 Lab Deep Learning

In [1]:
! pip install ISLP

Collecting ISLP
  Downloading ISLP-0.4.0-py3-none-any.whl (3.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
Collecting lifelines (from ISLP)
  Downloading lifelines-0.29.0-py3-none-any.whl (349 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m349.3/349.3 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pygam (from ISLP)
  Downloading pygam-0.9.1-py3-none-any.whl (522 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m522.0/522.0 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from ISLP)
  Downloading pytorch_lightning-2.3.3-py3-none-any.whl (812 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.3/812.3 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchmetrics (from ISLP)
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [2]:
import numpy as np, pandas as pd
from matplotlib.pyplot import subplots
from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from ISLP import load_data
from ISLP.models import ModelSpec as MS
from sklearn.model_selection import train_test_split, GridSearchCV

In [3]:
import torch
from torch import nn
from torch.optim import RMSprop
from torch.utils.data import TensorDataset

In [6]:
from torchmetrics import MeanAbsoluteError, R2Score
from torchsummary import summary
from torchvision.io import read_image

In [7]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger

In [9]:
torch.manual_seed(0)
np.random.seed(0)
torch.use_deterministic_algorithms(True, warn_only=True)

In [10]:
from torchvision.datasets import MNIST, CIFAR100
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.transforms import Resize, Normalize, CenterCrop, ToTensor

In [11]:
from ISLP.torch import SimpleDataModule, SimpleModule, ErrorTracker, rec_num_workers

In [12]:
from ISLP.torch.imdb import load_lookup, load_tensor, load_sparse, load_sequential

In [13]:
from glob import glob
import json

## 10.9.1 Single Layer Network on Hitters Data


In [14]:
Hitters = load_data('Hitters').dropna()
n = Hitters.shape[0]
n

263

In [15]:
model = MS(Hitters.columns.drop('Salary'), intercept  = False)
X = model.fit_transform(Hitters).to_numpy()
y = Hitters['Salary'].to_numpy()

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 1/3, random_state = 1)

### Linear Models

In [19]:
hit_lm = LinearRegression().fit(X_train, y_train)
pred = hit_lm.predict(X_test)
test = np.abs(pred - y_test).mean()
test

259.7152883314631

In [20]:
scaler = StandardScaler(with_mean = True, with_std = True)
lasso = Lasso(warm_start = True, max_iter = 30000)
standard_lasso = Pipeline(steps = [('scaler', scaler), ('lasso', lasso)])

In [21]:
X_s = scaler.fit_transform(X_train)
n = X_s.shape[0]
lam_max = np.fabs(X_s.T.dot(y_train - y_train.mean())).max() / n
param_grid = {'alpha': np.exp(np.linspace(0, np.log(0.01), 100)) * lam_max}

In [26]:
cv = KFold(10, shuffle = True, random_state = 1)
grid = GridSearchCV(lasso, param_grid, cv = cv, scoring = 'neg_mean_absolute_error').fit(X_train, y_train);


In [27]:
best_lasso = grid.best_estimator_
best_predict = best_lasso.predict(X_test)
mae = np.abs(best_predict - y_test).mean()
mae

257.23820107995

### Specifying a Network: Classes and Inheritance

In [29]:
class HittersModel(nn.Module):

  def __init__(self, input_size):
    super(HittersModel, self).__init__()
    self.flatten = nn.Flatten()
    self.sequential = nn.Sequential(
        nn.Linear(input_size,50),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(50,1)
    )

  def forward(self,x):
    x = self.flatten(x)
    return torch.flatten(self.sequential(x))

In [30]:
hit_model = HittersModel(X.shape[1])

In [40]:

summary(hit_model, input_size=(1, X_train.shape[1]))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                   [-1, 19]               0
            Linear-2                   [-1, 50]           1,000
              ReLU-3                   [-1, 50]               0
           Dropout-4                   [-1, 50]               0
            Linear-5                    [-1, 1]              51
Total params: 1,051
Trainable params: 1,051
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.01
----------------------------------------------------------------


In [43]:
! pip install torchinfo
from torchinfo import summary

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [44]:
summary(hit_model, input_size=X_train.shape, col_names=['input_size', 'output_size', 'num_params'])

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
HittersModel                             [175, 19]                 [175]                     --
├─Flatten: 1-1                           [175, 19]                 [175, 19]                 --
├─Sequential: 1-2                        [175, 19]                 [175, 1]                  --
│    └─Linear: 2-1                       [175, 19]                 [175, 50]                 1,000
│    └─ReLU: 2-2                         [175, 50]                 [175, 50]                 --
│    └─Dropout: 2-3                      [175, 50]                 [175, 50]                 --
│    └─Linear: 2-4                       [175, 50]                 [175, 1]                  51
Total params: 1,051
Trainable params: 1,051
Non-trainable params: 0
Total mult-adds (M): 0.18
Input size (MB): 0.01
Forward/backward pass size (MB): 0.07
Params size (MB): 0.00
Estimated Total Size (MB): 0.09

In [45]:
X_train_t = torch.tensor(X_train.astype(np.float32))
y_train_t = torch.tensor(y_train.astype(np.float32))
hit_train = TensorDataset(X_train_t, y_train_t)