# Transformer NN

Investigate a simple feedforward NN. Look at 1 layer then multi layer?

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.cuda.amp import autocast
from torch.cuda.amp import GradScaler

from os.path import join

import sys
sys.path.append('../')
import utils

In [2]:
assert torch.cuda.is_available(), 'CUDA is not available.'

## Load Data

In [3]:
fname = join('/home/squirt/Documents/data/weather_data/', 'all_data.h5')

Set batchsize and partition. Create Dataloaders from h5 data. Use utils function

In [4]:
batch_size = 512
split = 0.5
t_dl, v_dl = utils.get_dataloaders(fname, batch_size, split)

## Define Model

Define model in utils. So we can use main method eventually for training

In [5]:
model = utils.ConvTrans(input_shape=[71,3,2,2])
model = model.double().cuda()

## Define Training Loop 

In [6]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

Define training loop. Does one pass over data then returns average loss
Inputs:
    - model (nn.Module): Our Net that we want to optimize
    - dl (torch.utils.data.DataLoader): Train dataloader
    - optim (torch.optim): Optimizer to train
    - loss (nn.Module): Loss function (can be nn.Module or nn.Function)
Returns:
    - Training loss on one pass


In [7]:

def train(model:nn.Module, dl:torch.utils.data.DataLoader, optim:torch.optim, loss:nn.Module) -> float:
    model.train()
    total_loss = .0
    scaler = GradScaler()

    for _, (l, x, y) in enumerate(dl):
        l = l.cuda()
        x = x.cuda()
        y = y.cuda()

        # Combine
        l = l.unsqueeze(1)
        x = torch.cat((l, x), 1)

        # Forward pass
        with autocast():
            y_pred = model(x)
            l = loss(y_pred, y)
            total_loss += l.item()

        # Preform backpass
        scaler.scale(l).backward()
        scaler.step(optim)
        scaler.update()
    
    return total_loss / len(dl)

Define eval loss loop. Does one pass over evaluation data and returns the average loss
Inputs:
    - model (nn.Module): Network that we are training
    - dl (torch.utils.data.DataLoader): Eval dataloader

In [8]:
def eval(model:nn.Module, dl:torch.utils.data.DataLoader, loss:nn.Module) -> float:
    model.eval()
    total_loss = .0

    for _, (l, x, y) in enumerate(dl):
        l = l.cuda()
        x = x.cuda()
        y = y.cuda()

        # Combine
        l = l.unsqueeze(1)
        x = torch.cat((l, x), 1)

        # Forward pass
        with autocast():
            y_pred = model(x)
            l = loss(y_pred, y)
            total_loss += l.item()

    return total_loss / len(dl)

## Training

In [9]:
train_loss = nn.MSELoss()

In [10]:
for i in range(5):
    train(model, t_dl, optimizer, train_loss)
    l = eval(model, v_dl, train_loss)
    print(f'Epoch {i+1} - Eval Loss: {l}')

Epoch 1 - Eval Loss: 0.26916159101961545
Epoch 2 - Eval Loss: 0.26368698782720523
Epoch 3 - Eval Loss: 0.25934391670063683
Epoch 4 - Eval Loss: 0.2578923886781918
Epoch 5 - Eval Loss: 0.25694811837023784


In [11]:
print(len(t_dl), len(v_dl))
print(len(t_dl.dataset), len(v_dl.dataset))

19 19
9600 9600
