# Fake Training

For our problem we have the output from the model as:

$[N, C]$

And our labels are one hot encoding are the same:

$[N, C]$ 

where N is the number of samples, C is either {1, 0}.

In PyTorch torch.nn.BCELoss() is [Binary Cross Entropy Loss](https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html):

Which expects the input:

***

Input: (N, *)(N,∗) where *∗ means, any number of additional dimensions

Target: (N, *)(N,∗) , same shape as the input

Output: scalar. If reduction is 'none', then (N, *)(N,∗) , same shape as input.

***

We want to apply the sigmoid function to the inputs to ensure they are in the 0 -> 1 range.

We could also just use nn.BCE() which includes this.

[A good example](https://jbencook.com/cross-entropy-loss-in-pytorch/)

In [1]:
# imports
import torch
from torch import nn
from torchvision.models import resnet18
from torch.utils.data import Dataset
from torchvision import datasets
from torch.utils.data import DataLoader

## Accuracy

In [2]:
correct = 0

In [3]:
correct += torch.sum((pred >= 0.5).float() == y.float()).item()

NameError: name 'pred' is not defined

In [4]:
correct

0

In [5]:
correct / (1*3)

0.0

In [6]:
# fake ouputs from model and target
pred2 = torch.tensor([[0.5, 0., 1.]])
y2 = torch.tensor([[1., 1., 0.]])
correct += torch.sum((pred2 >= 0.5).float() == y2.float()).item()

In [7]:
correct / (2*3)

0.16666666666666666

In [8]:
torch.randn(19)

tensor([ 0.6386, -0.3653, -1.6166, -0.8355, -0.7439, -0.8709, -0.6744,  1.0874,
         0.6042,  0.7793,  0.0968, -0.5070, -0.2855,  1.5851,  1.5251, -1.5410,
         0.8841,  1.5333,  0.5822])

# Fake Dataset
We need a fake dataset which contains lots of random 4 channel images.

In [9]:
m = nn.Sigmoid(torch.randn(19))

TypeError: __init__() takes 1 positional argument but 2 were given

In [10]:
m(torch.randn(19))

NameError: name 'm' is not defined

In [11]:
class CustomImageDataset(Dataset):
    def __init__(self, transform=None, target_transform=None):
        self.transform = transform
        self.target_transform = target_transform
        self.m = nn.Sigmoid()

    def __len__(self):
        return 1000

    def __getitem__(self, idx):
        image = torch.randn([4, 512, 512])
        label = self.m(torch.randn(19))
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        sample = {"image": image, "label": label}
        return (image, label)

In [12]:
fake_dataset = CustomImageDataset(transform=None, target_transform=None)
fake_loader = DataLoader(fake_dataset, batch_size=64, shuffle=True)

In [13]:
fake_images, fake_labels = next(iter(fake_loader))

In [14]:
fake_images.shape

torch.Size([64, 4, 512, 512])

In [15]:
fake_labels.shape

torch.Size([64, 19])

As you can see we get random labels and images, our loss function may be all over the place but that prove it works.

# Fake Model

In [16]:
class NeuralNetwork(nn.Module):
    def __init__(self, NUM_CLASSES, DROPOUT_RATE):
        super(NeuralNetwork, self).__init__()
        self.convolutions = nn.Sequential(nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False),
                                          *(list(resnet18().children())[1:-1]))
        self.dropout = nn.Dropout(DROPOUT_RATE)
        self.dense = nn.Linear(512, NUM_CLASSES)
        self.out = nn.Sigmoid()

    def forward(self, X):
        batch_size = X.shape[0]
        # Extracts 512x1 feature vector from pretrained resnet18 conv layers
        x = self.convolutions(X).reshape(batch_size, -1)
        # Fully connected dense layer to 19 class output
        output = self.dense(self.dropout(x))
        # Sigmoid activations on output to infer class probabilities
        output_probs = self.out(output)
        return output_probs

In [17]:
model = NeuralNetwork(19, 0.1)
X = torch.randn([5, 4, 512, 512])

pred = model(X)
pred.shape

torch.Size([5, 19])

In [18]:
pred

tensor([[0.4347, 0.4889, 0.4481, 0.6884, 0.7138, 0.5247, 0.4377, 0.5655, 0.5034,
         0.4689, 0.6048, 0.3298, 0.4841, 0.5442, 0.5227, 0.2754, 0.3668, 0.5414,
         0.5654],
        [0.4459, 0.4870, 0.4501, 0.7119, 0.7414, 0.4481, 0.4200, 0.5835, 0.4153,
         0.3985, 0.5523, 0.3693, 0.5269, 0.6073, 0.4992, 0.2704, 0.4022, 0.5283,
         0.5307],
        [0.4314, 0.4898, 0.3893, 0.6736, 0.7139, 0.4857, 0.3694, 0.5643, 0.3739,
         0.4592, 0.5868, 0.3282, 0.5360, 0.5353, 0.6265, 0.3485, 0.3814, 0.5098,
         0.5303],
        [0.4186, 0.5293, 0.4728, 0.6519, 0.7093, 0.5174, 0.3926, 0.5374, 0.5008,
         0.4503, 0.5880, 0.3283, 0.4677, 0.5593, 0.6270, 0.3169, 0.3624, 0.5842,
         0.5635],
        [0.4661, 0.5114, 0.4359, 0.6736, 0.7243, 0.5295, 0.4492, 0.6003, 0.4514,
         0.3455, 0.5664, 0.3433, 0.4103, 0.6062, 0.5823, 0.2612, 0.3765, 0.5793,
         0.5044]], grad_fn=<SigmoidBackward>)

# Fake Loss Function
Using fake model, let's get the fake loss function working

In [19]:
# Loss function
loss_fn = nn.BCELoss()

In [20]:
# First we have to make fake Ground Truths
ground_truth = torch.full((5, 19), 1)
ground_truth.shape

torch.Size([5, 19])

In [21]:
ground_truth

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [22]:
# compute loss 
loss = loss_fn(pred, ground_truth.float())
loss

tensor(0.7247, grad_fn=<BinaryCrossEntropyBackward>)

We then can use loss.backward() to update the change in loss for the weights.

<code>loss.backward() computes dloss/dx for every parameter x which has requires_grad=True. These are accumulated into x.grad for every parameter x. In pseudo-code:

x.grad += dloss/dx
optimizer.step updates the value of x using the gradient x.grad. For example, the SGD optimizer performs:
x += -lr * x.grad
optimizer.zero_grad() clears x.grad for every parameter x in the optimizer. It’s important to call this before loss.backward(), otherwise you’ll accumulate the gradients from multiple passes.</code>

# Fake Optimiser

N.B In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. This is convenient while training RNNs. So, the default action is to accumulate (i.e. sum) the gradients on every loss.backward() call.

Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly. Else the gradient would point in some other direction than the intended direction towards the minimum (or maximum, in case of maximization objectives).

[Why Choose Adam](https://debuggercafe.com/adam-algorithm-for-deep-learning-optimization/)

In [23]:
# init
optim = torch.optim.Adam(model.parameters(), lr=0.0001) # we could put a scheduler here.

When we use te optimizer what we are doing is [because](https://deeplearningdemystified.com/article/fdl-4) we want to avoid certain traps in achieving minimal loss (local optima, changing how certain weights are updated).

To use the optimiser we first initialize (as above) and and then in the training loop:

<code>#Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()</code>

# Fake Training Loop
Using fake inputs and predicitons I am going to show what the training loop should look like.

In [24]:
def train_loop(model, optim, dataloader, loss_fn, USE_GPU=False):
    # How Long is our dataset
    size = len(dataloader.dataset)
    
    # Firstly set model to training
    model.train()
    # if we are using a GPU send the model to device
    if USE_GPU:
        model = model.cuda()
    
    # Logging and Stats
    loss_log = list()
    
    # Set gradients to be trainable
    with torch.set_grad_enabled(True):
        for batch_num, (X,y) in enumerate(dataloader):
            # If cuda send to cuda
            if USE_GPU:
                X = X.cuda()
                y = y.cuda()

            # Compute prediction and loss
            pred = model(X)
            loss = loss_fn(pred, y)
            
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Logging and Stats
            if batch_num % 1 == 0:
                loss, current = loss.item(), batch_num * len(X)
                loss_log.append(loss)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
                
    return(loss_log)

# Fake Testing Loop

In [25]:
def test_loop(model, optim, dataloader, loss_fn, USE_GPU=False):
    # How long is our dataset
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# Fake Subsets

In [26]:
# batch size
batch_s = 64

# train dataset & dataloader
dataset_train = torch.utils.data.Subset(fake_dataset, 
                                        list(range(0,800)))

train_loader = torch.utils.data.DataLoader(dataset_train,
                                          batch_size=batch_s,
                                          shuffle=True)

# test dataset & dataloader
dataset_test = torch.utils.data.Subset(fake_dataset,
                                     list(range(800, len(fake_dataset))))
test_loader = torch.utils.data.DataLoader(dataset_test,
                                          batch_size=1,
                                          shuffle=False)
print(len(dataset_train))
print(len(dataset_test))

dataloaders = {
    "train": train_loader,
    "test": test_loader
}

800
200


# Fake Run

In [None]:
# Config
EPOCHS = 5
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
loss_fn = nn.BCELoss()

best_acc = 0.0
log = list()

for t in range(EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    loss_log = train_loop(model, optimizer, dataloaders['train'], loss_fn, USE_GPU=False)
    log = log + loss_log
    test_loss, correct = test_loop(model, optimizer, dataloaders['test'], loss_fn, USE_GPU=False)
    
    if (correct>best_acc) and (save==True):
        best_acc = correct
        save_model(model.state_dict())
    
print("Done!")

Epoch 1
-------------------------------
