# Convolutional Neural Networks - Part II

## Tasks

### Task 1

Code a function `get_normalize` that will take a tensor with features of objects from some dataset with images and will return the per-channel mean and per-channel standard deviation.   
It is guaranteed that the matrix will have size \[N, C, H, W\], where:
- `N` is the number of objects,
- `C` is the number of channels,
- `H`, `W` are the dimensions of the images.

You need to return a tuple of two tensors of length `C`.  
Your function should have the following signature: `def get_normalize(features: torch.Tensor)`

In [1]:
import torch


def get_normalize(features: torch.Tensor):
    mean = torch.mean(features, dim=(0, 2, 3))
    std = torch.std(features, dim=(0, 2, 3))
    return mean, std

In [2]:
N, C, H, W = 10, 3, 32, 32
features = torch.randn(N, C, H, W)

mean, std = get_normalize(features)

print('Mean:', mean)
print('Std:', std)
print('Mean shape:', mean.shape)
print('Std shape:', std.shape)

Mean: tensor([-0.0095, -0.0103, -0.0054])
Std: tensor([0.9910, 0.9868, 1.0093])
Mean shape: torch.Size([3])
Std shape: torch.Size([3])


### Task 2

Code a function `get_augmentations` that will return ready-made augmentations for the training set and for the test set. It should have the following signature: `def get_augmentations(train: bool = True) -> T.Compose`  

Apply the following augmentations:
- Change the image size to make it 224 by 224 pixels (for both training and test sets).
- Apply some augmentations from those we studied in class (only for training).
- Convert the image to a tensor.
- Apply normalization for the `CIFAR10` dataset.

In [3]:
import torchvision.transforms as T


def get_augmentations(train: bool = True) -> T.Compose:
    # values specific for CIFAR10 dataset
    means = (0.49139968, 0.48215841, 0.44653091)
    stds = (0.24703223, 0.24348513, 0.26158784)

    if train:
        return T.Compose([
            T.Resize((224, 224)),
            T.RandomHorizontalFlip(),
            T.RandomRotation(degrees=15),
            T.RandomCrop(size=224, padding=4),
            T.ToTensor(),
            T.Normalize(means, stds)
        ])
    else:
        return T.Compose([
            T.Resize((224, 224)),
            T.ToTensor(),
            T.Normalize(means, stds)
        ])

In [4]:
train_transforms = get_augmentations(train=True)
test_transforms = get_augmentations(train=False)

print('Train Transforms:', train_transforms)
print('Test Transforms:', test_transforms)

Train Transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    RandomHorizontalFlip(p=0.5)
    RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=0)
    RandomCrop(size=(224, 224), padding=4)
    ToTensor()
    Normalize(mean=(0.49139968, 0.48215841, 0.44653091), std=(0.24703223, 0.24348513, 0.26158784))
)
Test Transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=(0.49139968, 0.48215841, 0.44653091), std=(0.24703223, 0.24348513, 0.26158784))
)


### Task 3

Code a function `predict`. It should take as input a neural network, a dataloader, and torch.device.  
It should have the following signature: `def predict(model: nn.Module, loader: DataLoader, device: torch.device)`  

Inside the function, take the following steps:
- Create an empty list to store predictions.
- Iterate through the dataloader.
- On each iteration, do a forward pass for the batch, calculate the classes as argmax on the neural network output (logits), add the tensor with predictions to the list.
- Concatenate all predictions and return this tensor of length N, according to the number of objects in the dataset.
- Your function should return a tensor with classes.

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


def predict(model: nn.Module, loader: DataLoader, device: torch.device):
    predictions = []
    model.eval()

    with torch.no_grad():
        for batch in loader:
            if isinstance(batch, (list, tuple)):
                x = batch[0]
            else:
                x = batch
            
            x = x.to(device)
            outputs = model(x)
            pred_classes = torch.argmax(outputs, dim=1)
            
            predictions.append(pred_classes)
    
    all_predictions = torch.cat(predictions)
    
    return all_predictions

In [6]:
# Dummy model for testing
class DummyModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return self.linear(x)


input_size = 10
num_classes = 5
batch_size = 32
num_samples = 100

model = DummyModel(input_size, num_classes)
data = torch.randn(num_samples, input_size)
labels = torch.randint(0, num_classes, (num_samples,))
dataset = torch.utils.data.TensorDataset(data, labels)
loader = DataLoader(dataset, batch_size=batch_size)
device = torch.device('cpu') 

# Get predictions
predicted_classes = predict(model, loader, device)

print('Predicted Classes:', predicted_classes)
print('Predicted Classes Shape:', predicted_classes.shape)
print('Predicted classes length:', len(predicted_classes))

Predicted Classes: tensor([4, 3, 0, 4, 1, 2, 2, 2, 4, 2, 3, 4, 0, 1, 0, 3, 0, 2, 1, 0, 2, 2, 1, 2,
        0, 2, 4, 4, 4, 4, 1, 4, 4, 3, 0, 1, 2, 4, 0, 1, 2, 1, 2, 1, 0, 1, 3, 4,
        2, 4, 3, 3, 4, 4, 2, 4, 3, 0, 4, 2, 1, 4, 3, 3, 3, 1, 2, 2, 4, 4, 2, 0,
        0, 4, 0, 1, 1, 4, 3, 1, 2, 3, 0, 1, 3, 2, 3, 3, 0, 4, 2, 0, 1, 1, 3, 3,
        2, 1, 1, 4])
Predicted Classes Shape: torch.Size([100])
Predicted classes length: 100


### Task 4

Code a function `predict_tta`. It should accept a neural network, a DataLoader, a torch.device, and the number of iterations over the DataLoader. It should have the following signature: `def predict_tta(model: nn.Module, loader: DataLoader, device: torch.device, iterations: int = 2)`  

Inside the function, take the following steps:
-   Start a loop for the number of iterations.
-   Inside the loop, iterate through the DataLoader.
-   Record the model's responses (not classes, but raw neural network outputs) into one large tensor of size \[N, C\], where C is the number of classes, and N is the number of objects in the dataset (that is, we must have a vector of neural network outputs, logits, for each object).
-   Make one huge tensor of size \[N, C, iterations\] from these tensors, average it over the iterations so that its size becomes \[N, C\].
-   Then predict the classes for the objects from this tensor as argmax, return them from the function.
-   Your function should return a tensor with classes. Don't forget to switch the model to application mode and use a decorator to disable gradient calculation.

In [7]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


def predict_tta(model: nn.Module, loader: DataLoader, device: torch.device, iterations: int = 2) -> torch.Tensor:
    predictions = []
    model.eval()

    with torch.no_grad(): 
        for _ in range(iterations):
            single_iteration_predictions = []
            for images, _ in loader:
                images = images.to(device)
                outputs = model(images)
                single_iteration_predictions.append(outputs)
            predictions.append(torch.vstack(single_iteration_predictions)) 

    predictions = torch.stack(predictions, dim=2)  
    averaged_predictions = torch.mean(predictions, dim=2)  
    predicted_classes = torch.argmax(averaged_predictions, dim=1)

    return predicted_classes

In [8]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Dummy Model for testing
class DummyModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, x):
        return self.linear(x)

# data for testing
batch_size = 16
num_samples = 100 
input_size = 28 * 28 
num_classes = 10
iterations = 3

images = torch.randn(num_samples, input_size)
labels = torch.randint(0, num_classes, (num_samples,))

dataset = TensorDataset(images, labels)  # Create a dataset
loader = DataLoader(dataset, batch_size=batch_size)

device = torch.device('cpu') 
model = DummyModel(input_size, num_classes).to(device)


predicted_classes = predict_tta(model, loader, device, iterations)

print(f'Predicted classes shape: {predicted_classes.shape}')
print(f'Predicted classes type: {predicted_classes.dtype}')
print(f'Predicted classes: {predicted_classes}')

Predicted classes shape: torch.Size([100])
Predicted classes type: torch.int64
Predicted classes: tensor([4, 5, 6, 8, 6, 7, 9, 3, 8, 5, 1, 7, 1, 2, 1, 1, 0, 1, 8, 6, 4, 6, 7, 6,
        0, 2, 2, 4, 4, 1, 3, 1, 5, 6, 5, 2, 5, 1, 5, 0, 2, 3, 5, 3, 3, 9, 4, 9,
        0, 9, 6, 0, 7, 8, 1, 5, 7, 6, 9, 7, 8, 9, 4, 1, 3, 8, 4, 4, 0, 5, 8, 0,
        8, 8, 9, 7, 3, 3, 6, 8, 2, 9, 1, 3, 4, 6, 5, 2, 3, 9, 3, 3, 5, 0, 9, 8,
        7, 3, 1, 3])
