# Neural Networks (Part 3): Big Data

In part 3 of this neural networks series of tutorials we'll adress some techniques for dealing with larger datasets that are just too impractical to train on all in one go. We'll discuss:
- Datasets and Dataloaders
- Batching
- Optimizers: SGD, Adam, RAdam, AdamW

We'll again load the usual libraries we'll need, set a seed, and set a device for those who'd prefer to work on the GPU. We'll also continue to use our MLP creator function, our parameter counter, and our learning rate finder.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

seed = 12
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x10c78a710>

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [4]:
def multilayer_mlp(num_features, num_targets, hidden_sizes, p=0, normalization=None, activation=nn.ReLU()):
    """
    Get an arbitrary MLP model with L=len(hidden_sizes)+1 layers
    Optionally, can include a batchnorm and dropout in each layer, and change the activation function
    """
    def get_leaf_layers(m):
        """Used to flatten the layers out so the model doesn't recursively nest"""
        children = list(m.children())
        if not children:
            return [m]
        leaves = []
        for l in children:
            leaves.extend(get_leaf_layers(l))
        return leaves
    hidden_sizes = [num_features] + hidden_sizes
    n_layers = len(hidden_sizes)
    model = nn.Sequential()
    for l in range(n_layers-1):
        linear = nn.Linear(hidden_sizes[l], hidden_sizes[l+1])
        dropout = None if p == 0. else nn.Dropout(p)
        layer_norm = None if normalization is None else normalization(hidden_sizes[l+1])
        blocks = [linear, layer_norm, activation, dropout]
        layer_block = nn.Sequential(*[layer for layer in blocks if layer is not None])
        model = nn.Sequential(*([model] + [layer_block]))
    output = nn.Sequential(nn.Linear(hidden_sizes[-1], num_targets))
    model = nn.Sequential(*([model] + [output]))
    model = nn.Sequential(*get_leaf_layers(model))
    return model

def lr_find(dataset, model, opt, loss_fn, batch_size=128, plot=True, log_lr=True, **kwargs):
    from torch_lr_finder import LRFinder
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
    lr_finder = LRFinder(model, opt, loss_fn, device=device)
    lr_finder.range_test(dataloader, **kwargs)
    lr_finder.plot(log_lr=log_lr)
    lr_finder.reset()
    
def num_params(model):
    return sum([p.numel() for p in model.parameters()])

In [None]:
# def train_model(X_train, y_train, model, opt, loss_fn, num_iters, X_test=None, y_test=None, scheduler=None):
#     for i in tqdm(range(num_iters)):
#         # training
#         model = model.train()
#         opt.zero_grad()
#         yhat = model(X_train)
#         loss = loss_fn(yhat, y_train)
#         loss.backward()
#         opt.step()
#         if scheduler:
#             scheduler.step()
#         avg_train_loss = loss / len(X_train)
#         # inference
#         if X_test is not None and y_test is not None:
#             model = model.eval()
#             yhat = model(X_test)
#             test_loss = loss_fn(yhat, y_test)
#             avg_test_loss = test_loss / len(X_test)
#         else:
#             avg_test_loss = None
#         if i % (num_iters // 10) == 0:
#             print(f'iter = {i} \t\t train loss = {avg_train_loss} \t\t test loss = {avg_test_loss}')
#     print(f'iter = {i} \t\t train loss = {avg_train_loss} \t\t test loss = {avg_test_loss}')
#     return model

# model = train_model(X_train, y_train, model, opt, loss_fn, num_iters, X_test=X_test, y_test=y_test)