# Experimenting with Residual Networks

In [ ]:
import os
user = "raphbrd"
repo = "residual_learning"

if os.path.isdir(repo):
    !rm -rf {repo}

!git clone https://github.com/{user}/{repo}.git

# not pretty code
from pathlib import Path
import shutil
p = Path(".")
for file in p.glob("residual_learning/*.py"):
    shutil.move(file, file.name)

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torch.optim.lr_scheduler import StepLR

from tqdm.notebook import tqdm

from models import LeNet5, ResNet, xavier_weights, kaiming_weights
from residuals import ConvResBlock, ConvPlainBlock, ConvResBlockPre
from data import _load_torch_data
from training import CallBacks, Trainer, Stream
from viz import plot_training_results

The following parameters are also used as default in the Streamlit app:

In [None]:
batch_size = 128
val_size = 0.10
n_epochs = 65
initialization = "xavier"  # or "kaiming"
path = "./derivatives"

# multi-labels (K = 10) classification task
criterion = nn.CrossEntropyLoss()

if not os.path.exists(path):
    os.mkdir(path)
    os.mkdir(path + "/models")
    os.mkdir(path + "/figures")

## Loading the CIFAR-10 dataset from torchvision

In [None]:
if not os.path.exists("sample_data"):
    os.mkdir("sample_data")

train_loader, validation_loader, test_loader, classes = _load_torch_data(
    data_path="sample_data",
    dataset_name="CIFAR10",
    batch_size=batch_size,
    val_size=val_size
)
input_size = next(iter(train_loader))[0].shape

def run_model(model_, desc_):
    """ helper function to train a model based on the globals variables of the environment """
    if initialization == "xavier":
        xavier_weights(model_)
    elif initialization == "kaiming":
        kaiming_weights(model_)

    optimizer = optim.SGD(model_.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
    scheduler = StepLR(optimizer, step_size=40, gamma=0.1)

    trainer = Trainer(model_, optimizer, scheduler, criterion, callbacks=CallBacks(Stream(print, tqdm)),
                      device=torch.device("cuda"), save_path=path + "/models")
    training_out = trainer.fit(train_loader, validation_loader, n_epochs=n_epochs, save_epo_state=False, desc=desc_)

    return training_out, trainer

## Training ResNets and PlainNets

In [None]:
models = {
    # 14 layers
    # "ResNet-14": ResNet(input_size[1], len(classes), module_list=[2, 2, 2], features_shapes=[16, 32, 64], block_type=ConvResBlock),
    # "PlainNet-14": PlainNet(input_size[1], len(classes), module_list=[2, 2, 2], features_shapes=[16, 32, 64]),
    # 18 layers
    # "ResNet-18": ResNet(input_size[1], len(classes), module_list=[3, 3, 3], features_shapes=[16, 32, 64], block_type=ConvResBlock),
    # "PlainNet-18": PlainNet(input_size[1], len(classes), module_list=[3, 3, 3], features_shapes=[16, 32, 64]),
    # 32 layers
    # "ResNet-32": ResNet(input_size[1], len(classes), module_list=[5, 5, 5], features_shapes=[16, 32, 64], block_type=ConvResBlock),
    # "PlainNet-32": PlainNet(input_size[1], len(classes), module_list=[5, 5, 5], features_shapes=[16, 32, 64]),
    # 38 layers
    # "ResNet-38": ResNet(input_size[1], len(classes), module_list=[6, 6, 6], features_shapes=[16, 32, 64], block_type=ConvResBlock),
    # "PlainNet-38": PlainNet(input_size[1], len(classes), module_list=[6, 6, 6], features_shapes=[16, 32, 64]),
    # 14 layers pre-activated
    "ResNet-14Pre": ResNet(input_size[1], len(classes), module_list=[2, 2, 2], features_shapes=[16, 32, 64], block_type=ConvResBlockPre),
    # 18 layers pre-activated
    "ResNet-18Pre": ResNet(input_size[1], len(classes), module_list=[3, 3, 3], features_shapes=[16, 32, 64], block_type=ConvResBlockPre),
    # 38 layers pre-activated
    "ResNet-38Pre": ResNet(input_size[1], len(classes), module_list=[6, 6, 6], features_shapes=[16, 32, 64], block_type=ConvResBlockPre),
}

for key, model in models.items():
    model, trainer = run_model(model, key)

    test_loss, accuracy = trainer.run_test(test_loader)
    out_per_class = trainer.run_test_per_class(test_loader)
    last_row = pd.DataFrame({
        "classes": "all",
        "accuracy": accuracy,
        "loss": test_loss,
        "n_instances": len(test_loader.dataset)
        }, index=[0])
    out_per_class = pd.concat([out_per_class, last_row], ignore_index=True)
    out_per_class.to_csv(path + f"/{key}_results_test_per_class.csv")