In [None]:
from datasets import PACSDataset
from torchvision import transforms
from torch.utils.data import DataLoader


# Training a neural network
We are able to load data from our custom datasets, now we need to train a neural network. We need the following:

1. A neural network
2. An optimiser
3. A training loop that iterates through samples provided by a dataloader and uses the optimiser to update the neural networks weights


## Train and Test dataloaders
Normally, we'll have a training, validation and test set. The validation set is used for hyperparameter tuning. Since we won't do any hyperparameter tuning we can just go ahead and use the test set for evaluation. 

In [None]:
# Files and transforms
with open("train_files.txt") as f:
    file_names_train = f.read().splitlines()
    
with open("test_files.txt") as f:
    file_names_test = f.read().splitlines()

# transforms
# A great point to add data augmentations - you want to do class-preserving transformations
# Rotations, Reflections etc
transform = transforms.Compose([
            transforms.Resize(size=(224, 224)),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
        ])

# Instantiate datasets

# Instantiate dataloaders

## Neural network modules
We will create a neural network modules. To illustrate the modularity of pytorch we will break it down into multiple modules.

1. A featurizer: applies convolutions to extract "useful" features
2. A classifier: fully connected net built on top of featurizer
3. A network that combines the two: Allows us to easily swap out parts 1 or 2. 

In [None]:
from torch import nn
from torch.nn import functional as F
import torch


# Featurizer
class Featurizer(nn.Module):
    def __init__(self):
        super(Featurizer, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.dropout = nn.Dropout2d(0.5)
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        return x

# Classifier
# We will hardcode the layers for now, but it is best to use parameters
class Classifier(nn.Module):
    def __init__(self, num_classes: int = 7):
        super(Classifier, self).__init__()
        # make a point here about the 12544
        self.fc1 = nn.Linear(12544, 4096)
        self.dropout = nn.Dropout2d(0.5)

        self.fc2 = nn.Linear(4096, 1024)
        self.fc3 = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Combination
class CNNClassifier(nn.Module):
    def __init__(self, num_classes: int = 7):
        super(CNNClassifier, self).__init__()
      

    def forward(self, x):
        pass

In [None]:
# Initialise network and move to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Training logic
As mentioned before, we can fit the training logic into a class with a network as an attribute.

We also need to consider hyperparameters

In [None]:
learning_rate = 1e-4
epochs = 16

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Move to device
        
        
        # compute predictions
        
        # evaluate loss
        

        # Zero gradients
        
        
        # Back propagation
    
        
        # Gradient descent

        # training progress
        if batch % 20 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # turn of gradients - we don't need the extra memory footprint
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# create loss_fn

# create optimiser


In [None]:
# loop over epochs
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    if (t+1) % 5 == 0:
        test_loop(train_dataloader, model, loss_fn)
        test_loop(test_dataloader, model, loss_fn)
print("Done!")