This is official pytorch tutorial: <a href=https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py> Blitz Tutorial<a>

What is done in this tutorial:
    1. Load and normalize the CIFAR10 training and test datasets using torchvision
    2. Define a Convolutional Neural Network
    3. Define a loss function
    4. Train the network on the training data
    5. Test the network on the test data

TORCHVISION:
    The torchvision package consists of popular datasets, model architectures,
    and common image transformations for computer vision.

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader
from torch.nn.functional import one_hot

import matplotlib.pyplot as plt
import numpy as np
import h5py
import pickle
from itertools import chain
from sklearn.model_selection import train_test_split
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import sys
sys.path.append('../common')
from dataset import TMH
from models import MLP

### Create dataloader

In [None]:
dataset = TMH(embeddings_path="/Users/fga/data/tmh/embeddings.h5",
                protein_hashes_path="../seq_anno_hash.pickle",
                train_ids="../data_splits/train_prot_id_labels.csv")
dataloader = DataLoader(dataset, batch_size=4)

test_dataset = TMH(embeddings_path="/Users/fga/data/tmh/embeddings.h5",
                protein_hashes_path="../seq_anno_hash.pickle",
                train_ids="../data_splits/test_prot_id_labels.csv")
test_dataloader = DataLoader(test_dataset, batch_size=4)

In [114]:
net = MLP()

# The function is indicating the performance of the model.
# During the training process this function should be minimized
criterion = nn.CrossEntropyLoss()

# The minimization is achieved through Stochastic Gradient Descent
optimizer = optim.Adagrad(net.parameters(), lr=0.01)

In [115]:
losses = []
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        """
        shape of inputs: torch.Size([4, 3, 32, 32])
            Batchsize: 4
            Channels: 3 (Red, Green, Blue)
            Image size: 32 x 32

        labels: tensor([9, 3, 0, 3])
            9: class of image 0 in batch
            3: class of image 1 in batch
            ...
        """
        inputs, labels = data

        """ zero the parameter gradients after every batch
        This is necessary because the gradients (directions of how the weigths and biases
        will be updated) are accumulated in each backward pass.
        https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch
        """
        optimizer.zero_grad()  # SGD

        # forward + backward + optimize
        # shape outputs: torch.Size([4, 10])
        # for every image a prediction
        #print(f"{inputs}")
        outputs = net(inputs)
        #print(f"{outputs} \t {labels}")

        # the first iteration CrossEntropy: tensor(2.3100, grad_fn=<NllLossBackward0>)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()

        # running loss after 3 iterations: 6.894119024276733
        # Why is the loss added?
        running_loss += loss.item()
        losses.append(loss.detach())
        if i % 100 == 99:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1}] loss: {(running_loss / 99):.3f}')
            running_loss = 0.0

print('Finished Training')

[1, 100] loss: 0.643
[1, 200] loss: 0.341
[1, 300] loss: 0.397
[1, 400] loss: 0.301
[1, 500] loss: 0.323
[1, 600] loss: 0.301
[1, 700] loss: 0.254
[1, 800] loss: 0.306
[2, 100] loss: 0.179
[2, 200] loss: 0.147
[2, 300] loss: 0.208
[2, 400] loss: 0.151
[2, 500] loss: 0.168
[2, 600] loss: 0.134
[2, 700] loss: 0.117
[2, 800] loss: 0.198
[3, 100] loss: 0.097
[3, 200] loss: 0.091
[3, 300] loss: 0.147
[3, 400] loss: 0.117
[3, 500] loss: 0.092
[3, 600] loss: 0.078
[3, 700] loss: 0.072
[3, 800] loss: 0.136
[4, 100] loss: 0.068
[4, 200] loss: 0.062
[4, 300] loss: 0.120
[4, 400] loss: 0.085
[4, 500] loss: 0.067
[4, 600] loss: 0.052
[4, 700] loss: 0.056
[4, 800] loss: 0.085
[5, 100] loss: 0.050
[5, 200] loss: 0.039
[5, 300] loss: 0.100
[5, 400] loss: 0.072
[5, 500] loss: 0.051
[5, 600] loss: 0.042
[5, 700] loss: 0.045
[5, 800] loss: 0.056
Finished Training


In [116]:
# smoothen
losss = [sum(losses[i:i+100])/100 for i in range(len(losses)-101)]
losss = torch.Tensor(losss)
px.line(losss)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

### Evaluate the model on the test data
This could be done with TorchMetrics but we will do this manually here

In [117]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)

        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        _, labels = torch.max(labels.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {len(test_labels)} test embeddings: {(100 * correct / total):.2f}')

Accuracy of the network on the 1392 test embeddings: 96.62


In [89]:
dl = iter(test_dataloader)

In [96]:
x, y = dl.__next__()
print(y)
y_hat = net(x)
y_hat

tensor([[1, 0, 0, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 1, 0, 0]])


tensor([[ 0.7053,  2.2922, -4.9044, -4.5034],
        [ 8.8387, -4.7909, -6.9981, -9.0850],
        [-9.5455, 12.5685, -9.9112, -7.1929],
        [-7.4062, 10.4996, -7.8818, -6.6492]], grad_fn=<AddmmBackward0>)

In [118]:
# prepare to count predictions for each class
classes = list(label_mappings.keys())
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        _, labels = torch.max(labels, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                         accuracy))

"""
Accuracy for class G_SP  is: 97.7 %
Accuracy for class G     is: 99.3 %
Accuracy for class SP_TM is: 79.4 %
Accuracy for class TM    is: 86.2 %
"""

Accuracy for class G_SP  is: 96.1 %
Accuracy for class G     is: 98.9 %
Accuracy for class SP_TM is: 89.9 %
Accuracy for class TM    is: 90.9 %


'\nAccuracy for class G_SP  is: 97.7 %\nAccuracy for class G     is: 99.3 %\nAccuracy for class SP_TM is: 79.4 %\nAccuracy for class TM    is: 86.2 %\n'

Create hashsum