### Mahsa Mozafarinia
#### Fine tunning pre-trained Resnet-50 on Cifar-10 and training from scratch.


In [None]:
import torch
import sys
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import SubsetRandomSampler
from torchvision.models import (
    resnet50,
    ResNet50_Weights,
    vgg11,
    vgg16,
    alexnet,
    VGG11_Weights,
    VGG16_Weights,
    AlexNet_Weights,
)
from sklearn.model_selection import KFold
import time
import math
import os
import copy
import pandas as pd
import numpy as np
from torch.utils.data import sampler
# from utils import progress_bar

In [None]:
path = "/Users/mahsa.mozafarinia/Documents/Jupyter/Machine Learning Project/"

In [None]:
import logging

# from EDGE_4_4_1 import EDGE
# from matplotlib.ticker import MaxNLocator
# plt.style.use('ggplot')
log = logging.getLogger("sampleLogger")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class Network:
    def __init__(self, device, arch, pretrained=False):
        "docstring"
        self.preprocess = None
        self.model = None
        self.arch = arch
        self.pretrained = pretrained
        self.device = device

    def set_model(self):
        if self.arch == "vgg11":
            if self.pretrained:
                weights = VGG11_Weights.IMAGENET1K_V1
                self.preprocess = weights.transforms()
                self.model = vgg11(weights=weights).to(self.device)
                self.model.eval()
            else:
                self.model = vgg11().to(self.device)
        elif self.arch == "vgg16":
            if self.pretrained:
                weights = VGG16_Weights.IMAGENET1K_V1
                self.preprocess = weights.transforms()
                self.model = vgg16(weights=weights).to(self.device)
                self.model.eval()
            else:
                self.model = vgg16().to(self.device)
        elif self.arch == "resnet":
            if self.pretrained:
                weights = ResNet50_Weights.DEFAULT
                self.preprocess = weights.transforms()
                self.model = resnet50(weights=weights).to(self.device)
                self.model.eval()
            else:
                self.model = resnet50().to(self.device)
        elif self.arch == "alexnet":
            if self.pretrained:
                weights = AlexNet_Weights.IMAGENET1K_V1
                self.preprocess = weights.transforms()
                self.model = alexnet(weights=weights).to(self.device)
                self.model.eval()
            else:
                self.model = alexnet().to(self.device)
        else:
            sys.exit("Wrong architecture")
        return self.model

    def trained_enough(
        self, accuracy, dataloader, loss_fn, optimizer, epochs, device
    ):  # I think it is not used in this code.
        i = 0
        while accuracy < 0.20:
            accuracy, _ = train(
                self.model, dataloader, loss_fn, optimizer, epochs, device
            )
            log.debug(f"{i} epoch extra training, accuracy: {100 * accuracy}")
            i += 1

In [None]:
batch_size = 128

In [None]:
# Loading the Data with batching
# In this part, I am going to define mean and variance of CIFAR-10
cifar_trainset = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transforms.ToTensor()
)

imgs = [
    item[0] for item in cifar_trainset
]  # item[0] and item[1] are image and its label
imgs = torch.stack(imgs, dim=0).numpy()

# calculate mean over each channel (r,g,b)
mean_r = imgs[:, 0, :, :].mean()
mean_g = imgs[:, 1, :, :].mean()
mean_b = imgs[:, 2, :, :].mean()
print(mean_r, mean_g, mean_b)

# calculate std over each channel (r,g,b)
std_r = imgs[:, 0, :, :].std()
std_g = imgs[:, 1, :, :].std()
std_b = imgs[:, 2, :, :].std()
print(std_r, std_g, std_b)

# Pretrained

In [None]:
# Loading the Data with mean and std of imagenet
print("==> Preparing data..")
transform_train = transforms.Compose(
    [
        transforms.RandomResizedCrop(
            224
        ),  # first crop the image randomly and then resize it.
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        # [0.49139968, 0.48215827, 0.44653124], [0.24703233, 0.24348505, 0.26158768]
    ]
)
# [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

transform_test = transforms.Compose(
    [
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

trainset = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_train
)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2
)

testset = datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test
)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=128, shuffle=False, num_workers=2
)

classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

In [None]:
def train_func(epoch_number, scheduler_fun):
    for sched in scheduler_fun:
        # epoch_number=50
        network = Network(device, "resnet", True)
        model = network.set_model()
        model.fc = nn.Linear(2048, 10)
        # model.load_state_dict(torch.load(path+'Weights/res-CIF-iter{}-epoch99.pth'.format(itera)))
        model = model.to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        if sched == "steplr":
            scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer, step_size=10, gamma=0.1
            )
        elif sched == "explr":
            scheduler = torch.optim.lr_scheduler.ExponentialLR(
                optimizer, gamma=math.exp(math.log(0.1) / 10)
            )
        # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epoch_number)
        # train_test_accuracy_epochs=pd.read_csv(path+'Accuracies/res_CIF_epoch_accs-iter{}.csv'.format(itera),index_col=0)

        train_test_accuracy_epochs = pd.DataFrame(
            [[0 for i in range(epoch_number)], [0 for i in range(epoch_number)]],
            columns=[i for i in range(epoch_number)],
        )

        for epoch in range(0, epoch_number):
            # training
            print("\nEpoch: %d" % epoch)
            model.train()
            train_loss = 0
            correct = 0
            total = 0
            for batch_idx, (inputs, targets) in enumerate(trainloader):
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

            #         print(epoch, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #                          % (train_loss/len(trainloader), 100.*correct/total, correct, total))
            # saving epoch train in this iter in a dataframe to plot later.
            train_test_accuracy_epochs.loc[:, epoch] = None
            train_test_accuracy_epochs.iloc[0, epoch] = 100.0 * correct / total

            # testing
            model.eval()
            test_loss = 0
            test_correct = 0
            test_total = 0
            with torch.no_grad():
                for batch_idx, (inputs, targets) in enumerate(testloader):
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)

                    test_loss += loss.item()
                    _, predicted = outputs.max(1)
                    test_total += targets.size(0)
                    test_correct += predicted.eq(targets).sum().item()

            scheduler.step()
            print("\n______________lr____________\n", optimizer.param_groups[0]["lr"])

            # print(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
            # print(epoch, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #                      % (test_loss/len(testloader), 100.*test_correct/test_total, test_correct, test_total))

            train_test_accuracy_epochs.iloc[1, epoch] = (
                100.0 * test_correct / test_total
            )
            torch.save(
                model.state_dict(),
                path
                + "Weights/res-CIF-iter{}-epoch{}-scheduler{}.pth".format(
                    itera, epoch, sched
                ),
            )

            print(
                epoch,
                "test-Loss: %.3f | Acc: %.3f%% (%d/%d)"
                % (
                    test_loss / len(testloader),
                    100.0 * test_correct / test_total,
                    test_correct,
                    test_total,
                ),
            )

            print(
                epoch,
                "train-Loss: %.3f | Acc: %.3f%% (%d/%d)"
                % (
                    train_loss / len(trainloader),
                    100.0 * correct / total,
                    correct,
                    total,
                ),
            )

        train_test_accuracy_epochs.to_csv(
            path
            + "Accuracies/res_CIF_epoch_accs-iter{}-scheduler{}.csv".format(
                itera, sched
            )
        )

In [None]:
train_func(50, ["explr", "steplr"])

# From Scratch

In [None]:
# Loading Data using mean and variance of CIFAR-10
print("==> Preparing data..")
transform_train = transforms.Compose(
    [
        transforms.RandomResizedCrop(
            224
        ),  # first crop the image randomly and then resize it.
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.49139968, 0.48215827, 0.44653124], [0.24703233, 0.24348505, 0.26158768]
        ),
        # [0.49139968, 0.48215827, 0.44653124], [0.24703233, 0.24348505, 0.26158768]
    ]
)
# [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

transform_test = transforms.Compose(
    [
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.49139968, 0.48215827, 0.44653124], [0.24703233, 0.24348505, 0.26158768]
        ),
    ]
)

trainset = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_train
)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2
)

testset = datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test
)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=128, shuffle=False, num_workers=2
)

classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

In [None]:
def train_func(epoch_number, scheduler_fun):
    for sched in scheduler_fun:
        # best_acc = 0  # best test accurac
        network = Network(device, "resnet", False)
        model = network.set_model()
        model.fc = nn.Linear(2048, 10)
        # model.load_state_dict(torch.load(path+'Weights/res-CIF-iter{}-epoch99.pth'.format(itera)))
        model = model.to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
        if sched == "steplr":
            scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer, step_size=40, gamma=0.1
            )
        elif sched == "explr":
            scheduler = torch.optim.lr_scheduler.ExponentialLR(
                optimizer, gamma=math.exp(math.log(0.1) / 40)
            )
        # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epoch_number)
        # train_test_accuracy_epochs=pd.read_csv(path+'Accuracies/res_CIF_epoch_accs-iter{}.csv'.format(itera),index_col=0)

        train_test_accuracy_epochs = pd.DataFrame(
            [[0 for i in range(epoch_number)], [0 for i in range(epoch_number)]],
            columns=[i for i in range(epoch_number)],
        )

        for epoch in range(0, epoch_number):
            # training
            print("\nEpoch: %d" % epoch)
            model.train()
            train_loss = 0
            correct = 0
            total = 0
            for batch_idx, (inputs, targets) in enumerate(trainloader):
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

            #         print(epoch, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #                          % (train_loss/len(trainloader), 100.*correct/total, correct, total))
            # saving epoch train in this iter in a dataframe to plot later.
            train_test_accuracy_epochs.loc[:, epoch] = None
            train_test_accuracy_epochs.iloc[0, epoch] = 100.0 * correct / total

            # testing
            model.eval()
            test_loss = 0
            test_correct = 0
            test_total = 0
            with torch.no_grad():
                for batch_idx, (inputs, targets) in enumerate(testloader):
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)

                    test_loss += loss.item()
                    _, predicted = outputs.max(1)
                    test_total += targets.size(0)
                    test_correct += predicted.eq(targets).sum().item()

            scheduler.step()
            print("\n______________lr____________\n", optimizer.param_groups[0]["lr"])

            # print(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
            # print(epoch, 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #         ,           % (test_loss/len(testloader), 100.*test_correct/test_total, test_correct, test_total))

            train_test_accuracy_epochs.iloc[1, epoch] = (
                100.0 * test_correct / test_total
            )
            torch.save(
                model.state_dict(),
                path
                + "Weights/res-CIF-iter-epoch{}_scratch-scheduler{}-step20.pth".format(
                    epoch, sched
                ),
            )

            print(
                epoch,
                "test Loss: %.3f | Acc: %.3f%% (%d/%d)"
                % (
                    test_loss / len(testloader),
                    100.0 * test_correct / test_total,
                    test_correct,
                    test_total,
                ),
            )

            print(
                epoch,
                "train Loss: %.3f | Acc: %.3f%% (%d/%d)"
                % (
                    train_loss / len(trainloader),
                    100.0 * correct / total,
                    correct,
                    total,
                ),
            )

        train_test_accuracy_epochs.to_csv(
            path
            + "Accuracies/res_CIF_epoch_accs_scratch_scheduler{}-step20.csv".format(
                sched
            )
        )

### I run this model using different step size. When the step size was less than 40, learning rate gets small very fast, results in vary small steps towards minimum. As the number of epochs we used in all our expriments are 50, this small steps leads the model to have around 80 percent accuracy on both test and train.

In [None]:
train_func(50, ["steplr", "explr"])