# Tutorial 4: PyTorch Tensor, Module

In [None]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib.colors as mplcolors
from tqdm import tqdm
from torch.utils.data import DataLoader
from tabulate import tabulate

### Set seed

In [None]:
np.random.seed(444)
torch.random.manual_seed(444)

# 1. Tensor basics

Tensors are similar to Numpy's ndarays, except that tensors can run on GPUs or other specialized hardware to accelerate computing.

### 1.1. Tensor initialization

Tensors can be created directly from data.

In [None]:
data = [[1, 2], [3, 4]]
tensor_from_data = torch.tensor(data)
print(type(tensor_from_data))
print(tensor_from_data)

Tensors can be created from Numpy arrays (and vice versa).

In [None]:
numpy_data = np.array(data)
tensor_from_numpy = torch.from_numpy(numpy_data)
print(type(tensor_from_numpy))
print(tensor_from_numpy)

In [None]:
numpy_from_tensor = tensor_from_numpy.numpy()
print(type(numpy_from_tensor))
print(numpy_from_tensor)

Tensors can be created from another tensor.

In [None]:
tensor_from_tensor1 = torch.ones_like(tensor_from_data)
print(f'Ones Tensor: \n {tensor_from_tensor1}')

In [None]:
tensor_from_tensor2 = torch.rand_like(tensor_from_data, dtype=torch.float64)
print(f'Random Tensor: \n {tensor_from_tensor2}')

### 1.2. Tensor attributes

Tensor attributes describe their shape, datatype, and the device on which they are stored.

In [None]:
tensor = torch.rand(3, 4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

### 1.3. Tensor operations (see [this page](https://pytorch.org/docs/stable/torch.html) for more details)

Elementwise operations:

In [None]:
x = torch.tensor([[1, 2], [3, 4]], dtype=torch.float64)
y = torch.tensor([[5, 6], [7, 8]], dtype=torch.float64)

In [None]:
# elemwise sum
print(x + y)
print(x.add(y))

In [None]:
# elemwise difference
print(x - y)
print(x.sub(y))

In [None]:
# elemwise product
print(x * y)
print(x.mul(y))

In [None]:
# elemwise division
print(x / y)
print(x.divide(y))

Matrix operations:

In [None]:
x = torch.tensor([[1, 2], [3, 4]], dtype=torch.float64)
y = torch.tensor([[5, 6], [7, 8]], dtype=torch.float64)

v = torch.tensor([9, 10], dtype=torch.float64)
w = torch.tensor([11, 12], dtype=torch.float64)

print(f'x: {x.shape}, y: {y.shape}, v: {v.shape}, w: {w.shape}')

In [None]:
# vector-vector product
print(v.dot(w))
print(torch.dot(v, w))

In [None]:
# matrix-vector product
print(x.matmul(v))
# print(torch.mm(x, v)) --> this will raise an error
print(torch.matmul(x, v))

In [None]:
# matrix-matrix product
print(x.mm(y))
print(torch.mm(x, y))
print(torch.matmul(x, y))

# 2. ```nn.Module```

* Base class for all neural network modules.
* Your models should also subclass this class.


### 2.1. Building MLP and CNN in PyTorch

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=3*32**2, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=10)
        )
    
    def forward(self, x):
        return self.net(x)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(in_features=16 * 10**2, out_features=120),
            nn.ReLU(),
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Linear(in_features=84, out_features=10)
        )
    
    def forward(self, x):
        return self.net(x)

### 2.2. Load data (cifar-10)

In [None]:
def get_data_stats(trn_data, tst_data):
    # number of img
    n_data_trn = len(trn_data)
    n_data_tst = len(tst_data)

    # img size
    size_trn = 'x'.join(map(str, trn_data.data.shape[1:]))
    size_tst = 'x'.join(map(str, tst_data.data.shape[1:]))

    # mean & std of img
    avg_trn = np.mean(trn_data.data, axis=(0, 1, 2))
    std_trn = np.std(trn_data.data, axis=(0, 1, 2))

    avg_tst = np.mean(tst_data.data, axis=(0, 1, 2))
    std_tst = np.std(tst_data.data, axis=(0, 1, 2))

    # convert to string
    rgb = ['R', 'G', 'B']
    ms_trn = ', '.join([f'{c}:{m:.2f}({s:.2f})' for c, m, s in zip(rgb, avg_trn, std_trn)])
    ms_tst = ', '.join([f'{c}:{m:.2f}({s:.2f})' for c, m, s in zip(rgb, avg_tst, std_tst)])

    # number of class & number of img per class
    n_class_trn, n_img_class_trn = np.unique(trn_data.targets, return_counts=True)
    n_class_tst, n_img_class_tst = np.unique(tst_data.targets, return_counts=True)

    n_class_trn = len(n_class_trn)
    n_class_tst = len(n_class_tst)

    # convert to string
    n_img_class_trn = ', '.join([f'{i}: {n:4d}' for i, n in enumerate(n_img_class_trn)])
    n_img_class_tst = ', '.join([f'{i}: {n:4d}' for i, n in enumerate(n_img_class_tst)])

    # aggregate
    data_stats = [['Train', n_data_trn, size_trn, ms_trn, n_class_trn, n_img_class_trn],
                  ['Test', n_data_tst, size_tst, ms_tst, n_class_tst, n_img_class_tst]]
    
    return data_stats


def random_indices(n_class_train, train_label):
    indices = []
    for i in range(n_class_train):
        idx = np.where(train_label == i)[0]
        idx_selected = np.random.choice(idx, size=5)
        indices.append(idx_selected)
    return np.array(indices).T


def plot_random_images(train_img, train_label, n_class_train):
    # select random indices
    indices = random_indices(n_class_train, np.array(train_label))

    # nrow & ncol of figure
    nrow, ncol = indices.shape

    # plot
    fig, axs = plt.subplots(nrow, ncol, figsize=(15, 5), constrained_layout=True)
    for i in range(nrow):
        for j in range(ncol):
            img = train_img[indices[i][j]]
            axs[i][j].imshow(img, vmin=0, vmax=255)
            axs[i][j].set_xticks([])
            axs[i][j].set_yticks([])

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# https://pytorch.org/vision/0.9/transforms.html
transform = transforms.Compose([transforms.ToTensor()])

# https://pytorch.org/vision/stable/datasets.html
train_data = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/ICE3050/dataset', train=True, download=True, transform=transform)
test_data = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/ICE3050/dataset', train=False, download=True, transform=transform)

In [None]:
data_stats = get_data_stats(train_data, test_data)
print(tabulate(data_stats, headers=['index', 'n img', 'img size', 'mean & std', 'n class', 'n img per class']))

In [None]:
plot_random_images(train_data.data, train_data.targets, n_class_train=10)

### 2.3. Train model

In [None]:
def train(model, optim, train_data, test_data, epochs, batch_size, lr, momentum):
    # set seed
    torch.manual_seed(0)

    # ship dataset to dataloader 
    # https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
    train_loader = DataLoader(train_data, batch_size, shuffle=True, num_workers=2)
    valid_loader = DataLoader(test_data, batch_size, shuffle=False, num_workers=2)

    # set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    if model == 'cnn':
      model = CNN().to(device)
    elif model == 'mlp':
      model = MLP().to(device)
    else:
      raise NotImplementedError

    # optimizer
    # https://pytorch.org/docs/stable/optim.html
    if optim == 'sgd':
      optim = torch.optim.SGD(model.parameters(), lr, momentum)
    elif optim == 'adam':
      optim = torch.optim.Adam(model.parameters(), lr)
    else:
      raise NotImplementedError

    def calc_accuracy(yhat, y):
        _, yhat = torch.max(yhat, dim=1)
        return (yhat == y).sum().item() / y.size(0)

    # loss
    loss_fn = nn.CrossEntropyLoss()
    eval_fn = calc_accuracy

    train_losses, train_accrs = [], []
    valid_losses, valid_accrs = [], []

    # begin training
    for e in range(epochs):
        train_loss = 0.
        train_accr = 0.
        for i, (x, y) in enumerate(tqdm(train_loader)):
            x = x.to(device)
            y = y.to(device)
            yhat = model(x)
            loss = loss_fn(yhat, y)
            accr = eval_fn(yhat, y)
            optim.zero_grad()
            loss.backward()
            optim.step()
            train_loss += loss
            train_accr += accr
        # save log
        train_losses.append(train_loss.item()/(i+1))
        train_accrs.append(train_accr/(i+1))
        # validation
        with torch.no_grad():
            valid_loss = 0.
            valid_accr = 0.
            for i, (x, y) in enumerate(tqdm(valid_loader)):
                x = x.to(device)
                y = y.to(device)
                yhat = model(x)
                loss = loss_fn(yhat, y)
                accr = eval_fn(yhat, y)
                valid_loss += loss
                valid_accr += accr
            # save log
            valid_losses.append(valid_loss.item()/(i+1))
            valid_accrs.append(valid_accr/(i+1))
        # print log
        log = f'Epoch: {e+1}/{epochs}, ' + \
            f'loss (train): {train_losses[-1]:.4f}, ' + \
            f'accuracy (train): {train_accrs[-1]*100:2.2f}%, ' + \
            f'loss (valid): {valid_losses[-1]:.4f}, ' + \
            f'accuracy (valid): {valid_accrs[-1]*100:2.2f}%'
        print(log)
    print('Done.', end='\n\n')

    return train_losses, train_accrs, valid_losses, valid_accrs

In [None]:
train_loss, train_accr, valid_loss, valid_accr = train('mlp', 'sgd', train_data, test_data, 5, 4, 0.001, 0.9)

In [None]:
train_loss2, train_accr2, valid_loss2, valid_accr2 = train('cnn', 'sgd', train_data, test_data, 5, 4, 0.001, 0.9)

In [None]:
drive.flush_and_unmount()

# 3. Optional readings
* [What is torch.nn really?](https://pytorch.org/tutorials/beginner/nn_tutorial.html)
* [How PyTorch Module works inside](https://teamdable.github.io/techblog/PyTorch-Module)