# import

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
import os
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["NUMEXPR_NU M_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"
import time

import numpy
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.cuda.amp import autocast

from torch.optim.lr_scheduler import LambdaLR
from torch.utils.tensorboard import SummaryWriter

In [6]:
from ubs8k.datasetManager import DatasetManager
from ubs8k.datasets import Dataset

from DCT.util.utils import reset_seed, get_datetime, get_model_from_name, load_dataset
from DCT.util.checkpoint import CheckPoint
from metric_utils.metrics import CategoricalAccuracy, FScore, ContinueAverage

# Arguments

In [7]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dataset_root", default="../datasets", type=str)
parser.add_argument("-D", "--dataset", default="cifar10", type=str, help="available [ubs8k | cifar10]")

parser.add_argument("--supervised_ratio", default=1.0, type=float)
parser.add_argument("-t", "--train_folds", nargs="+", default=[1, 2, 3, 4, 5, 6, 7, 8, 9], type=int)
parser.add_argument("-v", "--val_folds", nargs="+", default=[10], type=int)

parser.add_argument("--model", default="Pmodel", type=str)
parser.add_argument("--batch_size", default=100, type=int)
parser.add_argument("--nb_epoch", default=100, type=int)
parser.add_argument("--learning_rate", default=0.003, type=int)

parser.add_argument("--checkpoint_path", default="../model_save/ubs8k/full_supervised", type=str)
parser.add_argument("--resume", action="store_true", default=False)
parser.add_argument("--tensorboard_path", default="../tensorboard/ubs8k/full_supervised", type=str)
parser.add_argument("--tensorboard_sufix", default="", type=str)

args = parser.parse_args("")

In [8]:
# modify checkpoint and tensorboard path to fit the dataset
checkpoint_path_ = args.checkpoint_path.split("/")
tensorboard_path_ = args.tensorboard_path.split("/")

checkpoint_path_[2] = args.dataset
tensorboard_path_[2] = args.dataset

args.checkpoint_path = "/".join(checkpoint_path_)
args.tensorboard_path = "/".join(tensorboard_path_)
args

Namespace(batch_size=100, checkpoint_path='../model_save/cifar10/full_supervised', dataset='cifar10', dataset_root='../datasets', learning_rate=0.003, model='Pmodel', nb_epoch=100, resume=False, supervised_ratio=1.0, tensorboard_path='../tensorboard/cifar10/full_supervised', tensorboard_sufix='', train_folds=[1, 2, 3, 4, 5, 6, 7, 8, 9], val_folds=[10])

# initialisation

In [9]:
reset_seed(1234)


# Prepare the dataset

In [10]:
extra_train_transforms = [
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
]

manager, train_loader, val_loader = load_dataset(
    args.dataset,
    "supervised",
    dataset_root = args.dataset_root,
    supervised_ratio = args.supervised_ratio,
    batch_size = args.batch_size,
    train_folds = args.train_folds,
    val_folds = args.val_folds,
    verbose = 2
)

Files already downloaded and verified
Files already downloaded and verified


# Prep model

In [11]:
torch.cuda.empty_cache()

model_func = get_model_from_name(args.model)
model = model_func()
model = model.cuda()


In [12]:
from torchsummaryX import summary
input_tensor = torch.zeros((64, 3, 32, 32), dtype=torch.float)
input_tensor = input_tensor.cuda()

s = summary(model, input_tensor)


                                      Kernel Shape       Output Shape  \
Layer                                                                   
0_features.Conv2d_0                 [3, 128, 3, 3]  [64, 128, 32, 32]   
1_features.BatchNorm2d_1                     [128]  [64, 128, 32, 32]   
2_features.LeakyReLU_2                           -  [64, 128, 32, 32]   
3_features.Conv2d_3               [128, 128, 3, 3]  [64, 128, 32, 32]   
4_features.BatchNorm2d_4                     [128]  [64, 128, 32, 32]   
5_features.LeakyReLU_5                           -  [64, 128, 32, 32]   
6_features.Conv2d_6               [128, 128, 3, 3]  [64, 128, 32, 32]   
7_features.BatchNorm2d_7                     [128]  [64, 128, 32, 32]   
8_features.LeakyReLU_8                           -  [64, 128, 32, 32]   
9_features.MaxPool2d_9                           -  [64, 128, 16, 16]   
10_features.Dropout2d_10                         -  [64, 128, 16, 16]   
11_features.Conv2d_11             [128, 256, 3, 3] 

## Prep training

In [13]:
# create model
torch.cuda.empty_cache()

model = model_func()
model.cuda()

Pmodel(
  (features): Sequential(
    (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.999, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1)
    (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.999, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.1)
    (6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.999, affine=True, track_running_stats=True)
    (8): LeakyReLU(negative_slope=0.1)
    (9): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (10): Dropout2d(p=0.5, inplace=False)
    (11): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.999, affine=True, track_running_stats=True)
    (13): LeakyReLU(negative_slope=0.

# training parameters

In [14]:
# tensorboard
tensorboard_title = "%s_%s_%.1fS" % (get_datetime(), model_func.__name__, args.supervised_ratio)
checkpoint_title = "%s_%.1fS" % (model_func.__name__, args.supervised_ratio)
tensorboard = SummaryWriter(log_dir="%s/%s" % (args.tensorboard_path, tensorboard_title), comment=model_func.__name__)
print(os.path.join(args.tensorboard_path, tensorboard_title))

# losses
loss_ce = nn.CrossEntropyLoss(reduction="mean")

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

# callbacks
lr_lambda = lambda epoch: (1.0 + numpy.cos((epoch-1)*numpy.pi/args.nb_epoch)) * 0.5
lr_scheduler = LambdaLR(optimizer, lr_lambda)

# Checkpoint
checkpoint = CheckPoint(model, optimizer, mode="max", name="%s/%s.torch" % (args.checkpoint_path, checkpoint_title))

# Metrics
fscore_fn = FScore()
acc_fn = CategoricalAccuracy()
avg = ContinueAverage()

reset_metrics = lambda : [m.reset() for m in [fscore_fn, acc_fn, avg]]

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

../tensorboard/cifar10/full_supervised/2020-08-22_16:26:11_Pmodel_1.0S


In [15]:
def maximum():
    def func(key, value):
        if key not in func.max:
            func.max[key] = value
        else:
            if func.max[key] < value:
                func.max[key] = value
        return func.max[key]

    func.max = dict()
    return func
maximum_fn = maximum()

## Can resume previous training

In [16]:
if args.resume:
    checkpoint.load_last()

## training function

In [17]:
UNDERLINE_SEQ = "\033[1;4m"
RESET_SEQ = "\033[0m"


header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6} - {:<9.9} {:<12.12}| {:<9.9}- {:<6.6}"
value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.4f} - {:<9.9} {:<10.4f}| {:<9.4f}- {:<6.4f}"

header = header_form.format(
    "", "Epoch", "%", "Losses:", "ce", "metrics: ", "acc", "F1 ","Time"
)


train_form = value_form
val_form = UNDERLINE_SEQ + value_form + RESET_SEQ

print(header)

         Epoch  - %      - Losses:  ce     - metrics:  acc         | F1       - Time  


In [18]:
def train(epoch):
    start_time = time.time()
    print("")

    reset_metrics()
    model.train()

    for i, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        
        X = X.cuda()
        y = y.cuda()

        with autocast():
            logits = model(X)        
            loss = loss_ce(logits, y)

        loss.backward()
        optimizer.step()

        with torch.set_grad_enabled(False):
            pred = torch.softmax(logits, dim=1)
            pred_arg = torch.argmax(logits, dim=1)
            y_one_hot = F.one_hot(y, num_classes=10)

            acc = acc_fn(pred_arg, y).mean
            fscore = fscore_fn(pred, y_one_hot).mean
            avg_ce = avg(loss.item()).mean

            # logs
            print(train_form.format(
                "Training: ",
                epoch + 1,
                int(100 * (i + 1) / len(train_loader)),
                "", avg_ce,
                "", acc, fscore,
                time.time() - start_time
            ), end="\r")

    tensorboard.add_scalar("train/Lce", avg_ce, epoch)
    tensorboard.add_scalar("train/f1", fscore, epoch)
    tensorboard.add_scalar("train/acc", acc, epoch)

In [19]:
def val(epoch):
    start_time = time.time()
    print("")
    reset_metrics()
    model.eval()

    for i, (X, y) in enumerate(val_loader):
        X = X.cuda()
        y = y.cuda()

        with autocast():
            logits = model(X)
            loss = loss_ce(logits, y)

        with torch.set_grad_enabled(False):
            pred = torch.softmax(logits, dim=1)
            pred_arg = torch.argmax(logits, dim=1)
            y_one_hot = F.one_hot(y, num_classes=10)

            acc = acc_fn(pred_arg, y).mean
            fscore = fscore_fn(pred, y_one_hot).mean
            avg_ce = avg(loss.item()).mean

            # logs
            print(val_form.format(
                "Validation: ",
                epoch + 1,
                int(100 * (i + 1) / len(val_loader)),
                "", avg_ce,
                "", acc, fscore,
                time.time() - start_time
            ), end="\r")

    tensorboard.add_scalar("val/Lce", avg_ce, epoch)
    tensorboard.add_scalar("val/f1", fscore, epoch)
    tensorboard.add_scalar("val/acc", acc, epoch)
    
    tensorboard.add_scalar("hyperparameters/learning_rate", get_lr(optimizer), epoch)
    
    tensorboard.add_scalar("max/acc", maximum_fn("acc", acc), epoch )
    tensorboard.add_scalar("max/f1", maximum_fn("f1", fscore), epoch )

    checkpoint.step(acc)
    lr_scheduler.step()

In [None]:
print(header)

start_epoch = checkpoint.epoch_counter
end_epoch = args.nb_epoch

for e in range(start_epoch, args.nb_epoch):
    train(e)
    val(e)

         Epoch  - %      - Losses:  ce     - metrics:  acc         | F1       - Time  

Training 1      - 100    -          1.8367 -           0.3056    | 0.0810   - 35.8473
[1;4mValidati 1      - 100    -          1.6386 -           0.3741    | 0.1728   - 2.7486[0m
 better performance: saving ...

Training 2      - 100    -          1.6371 -           0.3895    | 0.2214   - 36.3308
[1;4mValidati 2      - 100    -          1.4531 -           0.4603    | 0.3432   - 2.8077[0m
 better performance: saving ...

Training 3      - 100    -          1.4876 -           0.4535    | 0.3431   - 36.4884
[1;4mValidati 3      - 100    -          1.3536 -           0.5094    | 0.4255   - 2.8258[0m
 better performance: saving ...

Training 4      - 100    -          1.3753 -           0.5008    | 0.4172   - 36.5961
[1;4mValidati 4      - 100    -          1.2637 -           0.5427    | 0.4872   - 2.7525[0m
 better performance: saving ...

Training 5      - 100    -          1.2973 -           0

In [None]:
tensorboard.flush()
tensorboard.close()

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪