In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import glob
import torch
import yaml
from torchvision import datasets

from torch.optim import lr_scheduler
import random
from tqdm import tqdm
import copy
import json
from torchvision import transforms
import torch.nn as nn
from torch import nn
import numpy as np
from PIL import Image

from torchvision.transforms import transforms

import torchvision.models as models
from collections import OrderedDict

import torch.nn.functional as F
from torchvision import transforms as T
import torchvision
from torch.utils.data.dataloader import DataLoader
from torch.utils.tensorboard import SummaryWriter

from shutil import copyfile
import time
# !pip install --ignore-installed PyYAML

In [15]:
class ResNet(torch.nn.Module):
    def __init__(self, network_name):
        super(ResNet, self).__init__()
        if network_name == 'resnet18':
            resnet = models.resnet18(pretrained=False)
        elif network_name == 'resnet50':
            resnet = models.resnet50(pretrained=False)


        self.encoder = torch.nn.Sequential(*list(resnet.children())[:-1])

        self.projection = MLPHead(IN_channels      = resnet.fc.in_features, 
                                  mlp_HIDDEN_size  = projection_head_mlp_hidden_size,
                                  OUT_channels     = projection_head_projection_size)

    def forward(self, x):
        h = self.encoder(x)
        h = h.view(h.shape[0], h.shape[1])
        return self.projection(h)

In [16]:
class MLPHead(nn.Module):
    def __init__(self, IN_channels, mlp_HIDDEN_size, OUT_channels):
        super(MLPHead, self).__init__()

        self.net = nn.Sequential(
                      nn.Linear(IN_channels, mlp_HIDDEN_size),
                      nn.BatchNorm1d(mlp_HIDDEN_size),
                      nn.ReLU(inplace=True),
                      nn.Linear(mlp_HIDDEN_size, OUT_channels))

    def forward(self, x):
        return self.net(x)

In [4]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms_supervised = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(28),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(28),
        transforms.CenterCrop(28),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

results_supervised = {'train_loss': [], 'test_loss': [], 
                      'train_acc': [], 'test_acc': []}                   

results_BYOL = {'train_loss': [], 'test_loss': [], 
                'train_acc': [], 'test_acc': []}   

results = {'train_loss': [], 'test_loss': [], 
           'train_acc': [], 'test_acc': []}   

In [5]:
def save_plot_model_optim_supervised(train_losses, test_losses, model, type_results):
    if type_results == "BYOL":
        print("[INFO] Plotting - saving after supervised training on BYOL pretrained model into files to: {}".format(results_path))
            
        plot_acc_loss(train_losses, test_losses, "Epochs", "Loss", 'Train Loss', 'Test Loss', "Training Losses", os.path.join(results_path, 'Supervised_BYOL_training_losses.png'))
            
        torch.save(model.state_dict(), os.path.join(results_path, "Supervised_BYOL_model.pth"))
        np.savez(os.path.join(results_path, "Supervised_BYOL_lossesfile"), np.array(train_losses))
        np.savetxt(os.path.join(results_path, "Supervised_BYOL_lossesfile.csv"), train_losses, delimiter=",")

    elif type_results == "notpretrained":
        print("[INFO] Plotting - saving after supervised training on NOT-pretrained model into files to: {}".format(results_path))
            
        plot_acc_loss(train_losses, test_losses, "Epochs", "Loss", 'Train Loss', 'Test Loss', "Training Losses", os.path.join(results_path, 'Supervised_NotPret_training_losses.png'))
            
        torch.save(model.state_dict(), os.path.join(results_path, "Supervised_NotPret_model.pth"))
        np.savez(os.path.join(results_path, "Supervised_NotPret_lossesfile"), np.array(train_losses))
        np.savetxt(os.path.join(results_path, "Supervised_NotPret_lossesfile.csv"), train_losses, delimiter=",")

# array of data, name of graph, folder to save
def plot_acc_loss(arr1, arr2, x_axes, y_axes, legend1, legend2, legend_name, fname):
    plt.figure(figsize=(10, 10))
    sns.set_style('darkgrid')
    plt.title(legend_name)
    plt.plot(arr1, label=arr1)
    if arr2:
        plt.plot(arr2, label=arr2)
    plt.xlabel(x_axes)
    plt.ylabel(y_axes)
    plt.legend([legend1, legend2])
    plt.savefig(fname)
    plt.show()
    plt.close()

In [6]:
# MODEL
def get_model(arch, out_dim):
    # defining our deep learning architecture
    if arch == "resnet18":
      resnet = models.resnet18(pretrained=False, num_classes=out_dim)
    elif arch == "resnet34":
      resnet = models.resnet34(pretrained=False, num_classes=out_dim)
    elif arch == "resnet50":
      resnet = models.resnet50(pretrained=False, num_classes=out_dim)
    else:
      print("[ERROR] Define resnet18 or resnet34 or resnet50")

    print("[INFO] Training on architecture: {}".format(arch))

    resnet.to(device)

    return resnet

In [49]:
class Supervised:
    def __init__(self, model, optimizer, scheduler, dataloaders, loss_function):
        self.model = model
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.dataloaders = dataloaders
        self.loss_function = loss_function
        #logging.basicConfig(filename=os.path.join(results_path, 'training.log'), level=logging.DEBUG)

    def train_test_model(self, epochs, log_interval):
        since = time.time()

        best_model_wts = copy.deepcopy(self.model.state_dict())
        best_acc, epoch_of_best_acc = 0.0, 0
        
        for epoch in range(1, SUPERVISED_EPOCHS+1):
            since_epoch = time.time()
            print('-' * 10)
            print('Epoch {}/{}'.format(epoch, SUPERVISED_EPOCHS))
            
            # Each epoch has a training and validation phase
            for phase in ['train', 'test']:
                if phase == 'train':
                    self.model.train()  # Set model to training mode
                else:
                    self.model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in self.dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    self.optimizer.zero_grad()

                    # forward - track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = self.model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = self.loss_function(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            self.optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                #if phase == 'train':
                    #scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                if phase == 'train':
                    results_supervised['train_loss'].append(epoch_loss)
                    results_supervised['train_acc'].append(epoch_acc.item())
                if phase == 'test':
                    results_supervised['test_loss'].append(epoch_loss)
                    results_supervised['test_acc'].append(epoch_acc.item())

                time_elapsed_epoch = time.time() - since_epoch
                print("[INFO] {} - Epoch {}/{} - Loss {:.4f} - Acc: {:.4f} - Time of last epoch: {:.0f}m {:.0f}s".format(phase, epoch, SUPERVISED_EPOCHS, epoch_loss, epoch_acc, time_elapsed_epoch // 60, time_elapsed_epoch % 60))
                #logging.info('Epoch {} - Loss: {:.4f} - Acc: {:.4f}'.format(epoch, epoch_loss, epoch_acc))

                # deep copy the model
                if phase == 'test' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(self.model.state_dict())
                    epoch_of_best_acc = epoch

                
        #logging.info('-------------TRAINING FINNISH-------------')
        time_elapsed = time.time() - since
        print('[INFO] Training completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
        print('[INFO] Best testing Accuracy: {:2} on epoch: {}'.format(best_acc*100, epoch_of_best_acc))

        plot_acc_loss(results_supervised['train_loss'], results_supervised['test_loss'], "EPOCHS", "LOSSES", "Train Loss", "Test Loss", "Losses", os.path.join(results_path, "train_test_losses_supervised.jpg"))
        plot_acc_loss(results_supervised['train_acc'], results_supervised['test_acc'], "EPOCHS", "ACCURACY", "Train Acc", "Test Acc", "Accuracy", os.path.join(results_path, "train_test_acc_supervised.jpg"))

        data_frame = pd.DataFrame(data=results_supervised, index=range(1, epoch + 1))
        data_frame.to_csv(os.path.join(results_path, "DATA_loss_acc_supervised.csv"), index_label='epoch')

        # load best model weights
        self.model.load_state_dict(best_model_wts)
        return self.model, results

In [50]:
arch = "resnet18"
dataset = "MNIST"

SUPERVISED_BATCH = 256
SUPERVISED_EPOCHS = 5
NUM_OUTPUT_CLASS = 10
LOG_INTERVAL = 10

print("[INFO] Preparing Datasets...")

data_dir = '/content/drive/MyDrive/Colab Notebooks/Self-Supervised-Learning/datasets/MNIST_folders'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms_supervised[x]) for x in ['train', 'test']}
dataloaders =    {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=SUPERVISED_BATCH, shuffle=True, num_workers=4) for x in ['train', 'test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes
print("[INFO] Class names: {}".format(class_names))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("[INFO] Training on: {}".format(device))

results_path = "/content/drive/MyDrive/Colab Notebooks/Supervised-Learning/results/supervised-mlp"     # Path to the results directory where the saved model and evaluation graphs would be stored.
specific_folder = 'dataset-{}_arch-{}_epochs-{}_batch-{}/'.format(dataset, arch, SUPERVISED_EPOCHS, SUPERVISED_BATCH)
results_path = os.path.join(results_path, specific_folder)
Path(results_path).mkdir(parents=True, exist_ok=True)
print("[INFO] Results path: {}".format(results_path))

[INFO] Preparing Datasets...
[INFO] Class names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
[INFO] Training on: cuda:0
[INFO] Results path: /content/drive/MyDrive/Colab Notebooks/Supervised-Learning/results/supervised-mlp/dataset-MNIST_arch-resnet18_epochs-5_batch-256/


In [None]:
model_supervised = get_model(arch, NUM_OUTPUT_CLASS)

learning_rate = 0.01
momentum = 0.9

loss_function = nn.CrossEntropyLoss()
optimizer_ft = torch.optim.SGD(model_supervised.parameters(), lr=learning_rate, momentum=momentum)              # Observe that all parameters are being optimized
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)      # Decay LR by a factor of 0.1 every 7 epochs

supervised_model = Supervised(model_supervised, optimizer_ft, None, dataloaders, loss_function)
print("[INFO] Start training on MNIST dataset for {} epochs, with {} batch_size...".format(SUPERVISED_EPOCHS, SUPERVISED_BATCH))
supervised_model_trained, results_supervised = supervised_model.train_test_model(SUPERVISED_EPOCHS, LOG_INTERVAL)
save_plot_model_optim_supervised(results_supervised['train_loss'], results_supervised['test_loss'], supervised_model_trained, type_results="notpretrained")   

In [22]:
model_supervised

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [52]:
# configurations for the projection and prediction heads
projection_head_mlp_hidden_size = 512     # Original implementation uses 4096
projection_head_projection_size = 128     # Original implementation uses 256

# LOAD PRETRAINED MODEL on BYOL
BYOL_pretrained = ResNet(arch).to(device)

# predictor network
predictor = MLPHead(IN_channels = BYOL_pretrained.projection.net[-1].out_features, 
                    mlp_HIDDEN_size = projection_head_mlp_hidden_size, 
                    OUT_channels = projection_head_projection_size).to(device)

results_path_load_BYOL = "/content/drive/MyDrive/Colab Notebooks/Self-Supervised-Learning/BYOL/sthalles/results/BYOL/dataset-MNIST_arch-resnet18_epochs-50_batch-200/"

print("[INFO] Loading pretrained model...")
load_params = torch.load(os.path.join(results_path_load_BYOL, 'BYOL_model.pth'), map_location = torch.device(torch.device(device)))
if 'Online_Net_state_dict' in load_params:
    BYOL_pretrained.load_state_dict(load_params['Online_Net_state_dict'])
    print("Parameters successfully loaded.")


[INFO] Loading pretrained model...
Parameters successfully loaded.


In [53]:
BYOL_pretrained = torch.nn.Sequential(*list(BYOL_pretrained.children())[:-1],   # remove the projection head
                                      nn.Linear(512, 10))   
BYOL_pretrained

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [54]:
BYOL_pretrained.to(device)

learning_rate = 0.01
momentum = 0.9

loss_function = nn.CrossEntropyLoss()
optimizer_ft = torch.optim.SGD(BYOL_pretrained.parameters(), lr=learning_rate, momentum=momentum)              # Observe that all parameters are being optimized
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)      # Decay LR by a factor of 0.1 every 7 epochs

BYOL_pretrained = Supervised(BYOL_pretrained, optimizer_ft, None, dataloaders, loss_function)
print("[INFO] Start training on MNIST dataset for {} epochs, with {} batch_size...".format(SUPERVISED_EPOCHS, SUPERVISED_BATCH))
BYOL_pretrained_trained, results_BYOL = BYOL_pretrained.train_test_model(SUPERVISED_EPOCHS, LOG_INTERVAL)
save_plot_model_optim_supervised(results_BYOL['train_loss'], results_BYOL['test_loss'], BYOL_pretrained_trained, type_results="BYOL")   

[INFO] Start training on MNIST dataset for 5 epochs, with 256 batch_size...
----------
Epoch 1/5
tensor([[[[-2.1179, -2.1179, -2.1179,  ..., -1.1760, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -1.4672, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -1.9638, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],

         [[-2.0357, -2.0357, -2.0357,  ..., -1.0728, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -1.3704, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -1.8782, -2.0357, -2.0357],
          ...,
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],

     

RuntimeError: ignored