# import

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["NUMEXPR_NU M_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"
import time

import numpy
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.cuda.amp import autocast

from torch.optim.lr_scheduler import LambdaLR
from torch.utils.tensorboard import SummaryWriter

In [3]:
from ubs8k.datasetManager import DatasetManager
from ubs8k.datasets import Dataset

from DCT.util.utils import reset_seed, get_datetime, get_model_from_name, load_dataset
from DCT.util.checkpoint import CheckPoint
from metric_utils.metrics import CategoricalAccuracy, FScore, ContinueAverage

# Arguments

In [4]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dataset_root", default="../datasets", type=str)
parser.add_argument("-D", "--dataset", default="cifar10", type=str, help="available [ubs8k | cifar10]")

parser.add_argument("--supervised_ratio", default=1.0, type=float)
parser.add_argument("-t", "--train_folds", nargs="+", default=[1, 2, 3, 4, 5, 6, 7, 8, 9], type=int)
parser.add_argument("-v", "--val_folds", nargs="+", default=[10], type=int)

parser.add_argument("--model", default="wideresnet28_2", type=str)
parser.add_argument("--batch_size", default=128, type=int)
parser.add_argument("--nb_epoch", default=300, type=int)
parser.add_argument("--learning_rate", default=0.003, type=int)

parser.add_argument("--checkpoint_path", default="../model_save/ubs8k/full_supervised", type=str)
parser.add_argument("--resume", action="store_true", default=False)
parser.add_argument("--tensorboard_path", default="../tensorboard/ubs8k/full_supervised", type=str)
parser.add_argument("--tensorboard_sufix", default="", type=str)

args = parser.parse_args("")

In [5]:
# modify checkpoint and tensorboard path to fit the dataset
checkpoint_path_ = args.checkpoint_path.split("/")
tensorboard_path_ = args.tensorboard_path.split("/")

checkpoint_path_[2] = args.dataset
tensorboard_path_[2] = args.dataset

args.checkpoint_path = "/".join(checkpoint_path_)
args.tensorboard_path = "/".join(tensorboard_path_)
args

Namespace(batch_size=128, checkpoint_path='../model_save/cifar10/full_supervised', dataset='cifar10', dataset_root='../datasets', learning_rate=0.003, model='wideresnet28_2', nb_epoch=300, resume=False, supervised_ratio=1.0, tensorboard_path='../tensorboard/cifar10/full_supervised', tensorboard_sufix='', train_folds=[1, 2, 3, 4, 5, 6, 7, 8, 9], val_folds=[10])

# initialisation

In [6]:
reset_seed(1234)


# Prepare the dataset

In [7]:
"""
we pre-processed the images using ZCA and augmented the dataset using horizontal flips and random translations. The translations
were drawn from [−2, 2] pixels,
"""
extra_train_transforms = [
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
]

manager, train_loader, val_loader = load_dataset(
    args.dataset,
    "supervised",
    
    extra_train_transform = extra_train_transforms,
    
    dataset_root = args.dataset_root,
    supervised_ratio = args.supervised_ratio,
    batch_size = args.batch_size,
    train_folds = args.train_folds,
    val_folds = args.val_folds,
    verbose = 2
)

Files already downloaded and verified
Files already downloaded and verified


# Prep model

In [8]:
torch.cuda.empty_cache()

from torchvision.models import ResNet
import torchvision.models as tm

# =============================================================================
#    WIDE RES NET
# =============================================================================
def wideresnet50_2(**kwargs):
    model = ResNet(tm.resnet.Bottleneck, [3, 4, 6, 3], num_classes=10, **kwargs)
    return model

def wideresnet28_2(**kwargs):
    model = ResNet(tm.resnet.Bottleneck, [2, 2, 2, 2], num_classes=10, **kwargs)
    return model


model_func = get_model_from_name(args.model)
# model = ResNet(tm.resnet.Bottleneck, [2, 2, 2, 2], num_classes=10)
model = model_func()
model = model.cuda()


In [9]:
from torchsummaryX import summary
input_tensor = torch.zeros((64, 3, 32, 32), dtype=torch.float)
input_tensor = input_tensor.cuda()

s = summary(model, input_tensor)


                                            Kernel Shape      Output Shape  \
Layer                                                                        
0_conv1                                    [3, 64, 7, 7]  [64, 64, 16, 16]   
1_bn1                                               [64]  [64, 64, 16, 16]   
2_relu                                                 -  [64, 64, 16, 16]   
3_maxpool                                              -    [64, 64, 8, 8]   
4_layer1.0.Conv2d_conv1                   [64, 64, 1, 1]    [64, 64, 8, 8]   
5_layer1.0.BatchNorm2d_bn1                          [64]    [64, 64, 8, 8]   
6_layer1.0.ReLU_relu                                   -    [64, 64, 8, 8]   
7_layer1.0.Conv2d_conv2                   [64, 64, 3, 3]    [64, 64, 8, 8]   
8_layer1.0.BatchNorm2d_bn2                          [64]    [64, 64, 8, 8]   
9_layer1.0.ReLU_relu                                   -    [64, 64, 8, 8]   
10_layer1.0.Conv2d_conv3                 [64, 256, 1, 1]   [64, 

## Prep training

In [10]:
nb_conv = 0

for layer in s.index.values:
    if "Conv" in layer:
        nb_conv += 1
print(nb_conv)

28


In [11]:
# create model
torch.cuda.empty_cache()

model = model_func()
model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# training parameters

In [12]:
# tensorboard
tensorboard_title = "%s_%s_%.1fS" % (get_datetime(), model_func.__name__, args.supervised_ratio)
checkpoint_title = "%s_%.1fS" % (model_func.__name__, args.supervised_ratio)
tensorboard = SummaryWriter(log_dir="%s/%s" % (args.tensorboard_path, tensorboard_title), comment=model_func.__name__)
print(os.path.join(args.tensorboard_path, tensorboard_title))

# losses
loss_ce = nn.CrossEntropyLoss(reduction="mean")

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

# callbacks
# https://arxiv.org/pdf/1610.02242.pdf page 11
# TODO FIND  A WAY TO DIFFERENTIATE LR SCHEDULER FOR CIFAR10 AND UBS8K
# def lr_lambda(e):
#     if e < 80:
#         phase = 1 - e / 80
#         return numpy.exp(-5 * phase**2)
    
#     elif 80 <= e < args.nb_epoch - 50:
#         return 1
#     else:
#         phase = (args.nb_epoch -e) / 50
#         return numpy.exp(-5 * (1 - phase)**2)
    
lr_lambda = lambda epoch: (1.0 + numpy.cos((epoch-1)*numpy.pi/args.nb_epoch)) * 0.5
lr_scheduler = LambdaLR(optimizer, lr_lambda)

# Checkpoint
checkpoint = CheckPoint(model, optimizer, mode="max", name="%s/%s.torch" % (args.checkpoint_path, checkpoint_title))

# Metrics
fscore_fn = FScore()
acc_fn = CategoricalAccuracy()
avg = ContinueAverage()

reset_metrics = lambda : [m.reset() for m in [fscore_fn, acc_fn, avg]]

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

../tensorboard/cifar10/full_supervised/2020-08-24_14:27:38_wideresnet28_2_1.0S


In [13]:
# import numpy as np
# import matplotlib.pyplot as plt

# def lr_lambda(e):
#     if e < 80:
#         phase = 1 - e / 80
#         return np.exp(-5 * phase**2)
    
#     elif 80 <= e < args.nb_epoch - 50:
#         return 1
#     else:
#         phase = (args.nb_epoch -e) / 50
#         return np.exp(-5 * (1 - phase)**2)

# x = np.linspace(0, 300, 300)
# y = [args.learning_rate * lr_lambda(x_) for x_ in x]

# plt.plot(x, y)
# plt.show()

In [14]:
def maximum():
    def func(key, value):
        if key not in func.max:
            func.max[key] = value
        else:
            if func.max[key] < value:
                func.max[key] = value
        return func.max[key]

    func.max = dict()
    return func
maximum_fn = maximum()

## Can resume previous training

In [15]:
if args.resume:
    checkpoint.load_last()

## training function

In [16]:
UNDERLINE_SEQ = "\033[1;4m"
RESET_SEQ = "\033[0m"


header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6} - {:<9.9} {:<12.12}| {:<9.9}- {:<6.6}"
value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.4f} - {:<9.9} {:<10.4f}| {:<9.4f}- {:<6.4f}"

header = header_form.format(
    "", "Epoch", "%", "Losses:", "ce", "metrics: ", "acc", "F1 ","Time"
)


train_form = value_form
val_form = UNDERLINE_SEQ + value_form + RESET_SEQ

print(header)

         Epoch  - %      - Losses:  ce     - metrics:  acc         | F1       - Time  


In [17]:
def train(epoch):
    start_time = time.time()
    print("")

    reset_metrics()
    model.train()

    for i, (X, y) in enumerate(train_loader):        
        X = X.cuda()
        y = y.cuda()

        logits = model(X)        
        loss = loss_ce(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.set_grad_enabled(False):
            pred = torch.softmax(logits, dim=1)
            pred_arg = torch.argmax(logits, dim=1)
            y_one_hot = F.one_hot(y, num_classes=10)

            acc = acc_fn(pred_arg, y).mean
            fscore = fscore_fn(pred, y_one_hot).mean
            avg_ce = avg(loss.item()).mean

            # logs
            print(train_form.format(
                "Training: ",
                epoch + 1,
                int(100 * (i + 1) / len(train_loader)),
                "", avg_ce,
                "", acc, fscore,
                time.time() - start_time
            ), end="\r")

    tensorboard.add_scalar("train/Lce", avg_ce, epoch)
    tensorboard.add_scalar("train/f1", fscore, epoch)
    tensorboard.add_scalar("train/acc", acc, epoch)

In [18]:
def val(epoch):
    start_time = time.time()
    print("")
    reset_metrics()
    model.eval()

    with torch.set_grad_enabled(False):
        for i, (X, y) in enumerate(val_loader):
            X = X.cuda()
            y = y.cuda()

            logits = model(X)
            loss = loss_ce(logits, y)

            # metrics
            pred = torch.softmax(logits, dim=1)
            pred_arg = torch.argmax(logits, dim=1)
            y_one_hot = F.one_hot(y, num_classes=10)

            acc = acc_fn(pred_arg, y).mean
            fscore = fscore_fn(pred, y_one_hot).mean
            avg_ce = avg(loss.item()).mean

            # logs
            print(val_form.format(
                "Validation: ",
                epoch + 1,
                int(100 * (i + 1) / len(val_loader)),
                "", avg_ce,
                "", acc, fscore,
                time.time() - start_time
            ), end="\r")

    tensorboard.add_scalar("val/Lce", avg_ce, epoch)
    tensorboard.add_scalar("val/f1", fscore, epoch)
    tensorboard.add_scalar("val/acc", acc, epoch)
    
    tensorboard.add_scalar("hyperparameters/learning_rate", get_lr(optimizer), epoch)
    
    tensorboard.add_scalar("max/acc", maximum_fn("acc", acc), epoch )
    tensorboard.add_scalar("max/f1", maximum_fn("f1", fscore), epoch )

    checkpoint.step(acc)
    lr_scheduler.step()

In [None]:
print(header)

start_epoch = checkpoint.epoch_counter
end_epoch = args.nb_epoch

for e in range(start_epoch, args.nb_epoch):
    train(e)
    val(e)

         Epoch  - %      - Losses:  ce     - metrics:  acc         | F1       - Time  

Training 1      - 100    -          2.0092 -           0.3321    | 0.1452   - 22.1454
[1;4mValidati 1      - 100    -          1.6407 -           0.4442    | 0.3153   - 1.4426[0m
 better performance: saving ...

Training 2      - 100    -          1.6733 -           0.4140    | 0.2728   - 22.2024
[1;4mValidati 2      - 100    -          1.6153 -           0.4394    | 0.3352   - 1.4957[0m
Training 3      - 100    -          1.4844 -           0.4763    | 0.3812   - 22.2677
[1;4mValidati 3      - 100    -          1.4018 -           0.4873    | 0.3983   - 1.4271[0m
 better performance: saving ...

Training 4      - 100    -          1.3505 -           0.5194    | 0.4494   - 21.7189
[1;4mValidati 4      - 100    -          1.2351 -           0.5635    | 0.5235   - 1.4386[0m
 better performance: saving ...

Training 5      - 100    -          1.2013 -           0.5721    | 0.5265   - 21.1698
[1

In [None]:
tensorboard.flush()
tensorboard.close()

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪