<a href="https://colab.research.google.com/github/scaomath/wustl-math450/blob/main/Lectures/Math_450_Notebook_10_(Validation).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Coding lecture 10 of Math 450

## Last couple of weeks
- A complete pipeline of training a machine learning model

## Today
- How to build a bigger and more complex neural network.
- Set up a validation strategy

In [None]:
import torch
import numpy as np
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Optimizer
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("dark")

import warnings
warnings.filterwarnings("ignore")

In [None]:
train = datasets.MNIST(root='./', 
                       train=True, 
                       download=True, 
                       transform = transforms.ToTensor());

train_loader = DataLoader(train, batch_size=8) 

class MLP(nn.Module):
    def __init__(self, 
                 input_size: int = 28*28,
                 output_size: int = 10):
        super(MLP, self).__init__() 
        self.linear0 = nn.Linear(input_size, 256)
        self.activation = nn.ReLU()
        self.linear1 = nn.Linear(256, output_size)
        
    def forward(self, x): 
        x = x.view(x.size(0), -1) 
        x1 = self.linear0(x)
        a1 = self.activation(x1)
        output = self.linear1(a1)

        return output

In [None]:

class SGD(Optimizer): # subclass of Optimizer
    """
    Implements the vanilla SGD simplified 
    from the torch official one for Math 450 WashU
    
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float): learning rate
        
    Example:
        >>> optimizer = SGD(model.parameters(), lr=1e-2)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()
    """

    def __init__(self, params, # params: model.parameters()
                       lr: float = 1e-3, # input: type = value
                 ): 
        defaults = dict(lr=lr) 
        # add a default attribute that can be accessed
        super(SGD, self).__init__(params, defaults)

    def step(self, closure=None): 
      '''
      step(): w_{k+1} = w_k - alpha*grad f(w_k)
      '''  
      for group in self.param_groups:
          for param in group['params']:
              if param.grad is None:
                  continue
              grad_param = param.grad.data
              
              param.data = param.data - group['lr']*grad_param
      return loss

In [None]:
model = MLP()
loss_func = nn.CrossEntropyLoss()
epochs = 2
learning_rate = 1e-3
optimizer = SGD(model.parameters(), lr=learning_rate)

# How to build a bigger net?

In [None]:
# pipeline
for epoch in range(epochs):
    
    model.train() # formalism, useful when we have dropout
    
    loss_vals = []
    
    with tqdm(total=len(train_loader)) as pbar: # progress bar
      for data, targets in train_loader:
          
        # forward pass
        outputs = model(data)
        
        # loss function
        loss = loss_func(outputs, targets)
        
        # record loss function values .item()
        loss_vals.append(loss.item())
        
        # clean the gradient from last iteration
        # param.grad is not zero in last iteration
        optimizer.zero_grad()
        
        # backprop
        # autograd
        loss.backward()
        
        # stochastic gradient descent
        # no with torch.no_grad(): block, param operation is using .data
        optimizer.step()
        
        # check accuracy

        # tqdm template
        desc = f"epoch: [{epoch+1}/{epochs}] loss: {np.mean(loss_vals):.4f}"
        pbar.set_description(desc)
        pbar.update()

# How to validate?

In order to make an informed choice, we need a way to *validate* that our model and our hyperparameters are a good fit to the data.
While this may sound simple, there are some pitfalls that you must avoid to do this effectively.


Model validation is very simple: making use of "holdout" validation sets and cross-validation for more robust model evaluation. We hold back some subset of the data from the training of the model, and then use this holdout set to check the model performance. 
This splitting can be done using the ``train_test_split`` utility in Scikit-Learn:

## Reference:
- Python data science handbook

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = train.data
y = train.targets

In [None]:
X_tr, X_val, y_tr, y_val = train_test_split(X, y, random_state=0, train_size=0.8)

In [None]:
train_set = TensorDataset(X_tr, y_tr)
train_loader = DataLoader(train_set, batch_size=4)