# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["NUMEXPR_NU M_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"
import numpy as np

import time
import math
import pickle
import argparse
import random
from random import shuffle
from tqdm import tqdm_notebook as tqdm
from tqdm import trange

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
import torch.utils.data as data
from torch.utils.tensorboard import SummaryWriter
from advertorch.attacks import GradientSignAttack
from torch.nn.utils import weight_norm

In [3]:
import sys
sys.path.append("..")

# dataset manager
from dcase2020.datasetManager import DESEDManager
from dcase2020.datasets import DESEDDataset


import augmentation_utils.signal_augmentations as sa
from metric_utils.metrics import CategoricalAccuracy, BinaryRatio, ContinueAverage, FScore

from dcase2020_task4.util.utils import get_datetime, reset_seed, ZipCycle
from dcase2020_task4.util.checkpoint import CheckPoint

from dcase2020_task4.CoTraining.models import dcase2019_model
from dcase2020_task4.CoTraining.losses import loss_cot, loss_diff, loss_diff, p_loss_diff, weak_synth_loss
from dcase2020_task4.CoTraining.ramps import Warmup, sigmoid_rampup
from dcase2020_task4.CoTraining.samplers import CoTrainingSampler

['/home/lcances/sync/Documents_sync/Projet/dcase2020_task4/dcase2020_task4/CoTraining/notebooks', '/home/lcances/.miniconda3/envs/dcase2020/lib/python37.zip', '/home/lcances/.miniconda3/envs/dcase2020/lib/python3.7', '/home/lcances/.miniconda3/envs/dcase2020/lib/python3.7/lib-dynload', '', '/home/lcances/.miniconda3/envs/dcase2020/lib/python3.7/site-packages', '/home/lcances/sync/Documents_sync/Projet/augmentation_utils', '/home/lcances/sync/Documents_sync/Projet/dcase2020_task4', '/home/lcances/sync/Documents_sync/Projet/Datasets/dcase2020', '/home/lcances/.miniconda3/envs/dcase2020/lib/python3.7/site-packages/IPython/extensions', '/home/lcances/.ipython', '..', '/home/lcances/sync/Documents_sync/Projet/dcase2020_task4/dcase2020_task4']


# Arguments (for compatibility with script)

In [4]:
import argparse
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='Deep Co-Training for Semi-Supervised Image Recognition')
parser.add_argument("--model", default="cnn", type=str, help="Model to load, see list of model in models.py")

parser.add_argument("--nb_view", default=2, type=int, help="Number of supervised view")
parser.add_argument("--ratio", default=0.1, type=float)
parser.add_argument("--parser_ratio", default=None, type=float, help="ratio to apply for sampling the S and U data")
parser.add_argument("--subsampling", default=1.0, type=float, help="subsampling ratio")
parser.add_argument("--subsampling_method", default="balance", type=str, help="method to perform subsampling [random | balance]")

parser.add_argument('--batchsize', '-b', default=100, type=int)
parser.add_argument('--epochs', default=150, type=int)

parser.add_argument('--lambda_cot_max', default=10, type=int)
parser.add_argument('--lambda_diff_max', default=0.5, type=float)
parser.add_argument('--warm_up', default=80.0, type=float)
parser.add_argument('--momentum', default=0.0, type=float)
parser.add_argument('--decay', default=1e-3, type=float)
parser.add_argument('--epsilon', default=0.02, type=float)

parser.add_argument('--seed', default=1234, type=int)
parser.add_argument('--num_class', default=10, type=int)
parser.add_argument("-T", '--tensorboard_dir', default='tensorboard/', type=str)
parser.add_argument('--checkpoint_dir', default='checkpoint', type=str)
parser.add_argument('--base_lr', default=0.05, type=float)
parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
parser.add_argument('--dataset', default='cifar10', type=str, help='choose svhn or cifar10, svhn is not implemented yey')
parser.add_argument("--job_name", default="default", type=str)
parser.add_argument("-a","--augments", action="append", help="Augmentation. use as if python script")
parser.add_argument("--augment_S", action="store_true", help="Apply augmentation on Supervised part")
parser.add_argument("--augment_U", action="store_true", help="Apply augmentation on Unsupervised part")
parser.add_argument("--num_workers", default=0, type=int, help="Choose number of worker to train the model")
parser.add_argument("--log", default="warning", help="Log level")
args = parser.parse_args("")

In [5]:
reset_seed(1234)

# Prepare the data

In [6]:
# ==== load the dataset ====
desed_metadata_root = "../../../dataset/DESED/dataset/metadata"
desed_audio_root = "../../../dataset/DESED/dataset/audio"

manager_parameters = dict(
    metadata_root = desed_metadata_root,
    audio_root = desed_audio_root,
    sampling_rate = 22050,
    from_disk = False,
    subsampling = 1.0,
    subsampling_method="inverse_distribution",
    nb_vector_bin = 431,
    verbose = 1
)

S_manager = DESEDManager(**manager_parameters)
U_manager = DESEDManager(**manager_parameters)

[1;34mDEBUG --- datasetManager.__init__ >>> ../../../dataset/DESED/dataset/audio/dcase2020_dataset_22050.hdf5[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../../../dataset/DESED/dataset/metadata/train/weak.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../../../dataset/DESED/dataset/metadata/train/unlabel_in_domain.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../../../dataset/DESED/dataset/metadata/train/synthetic20.tsv[0m
[1;34mDEBUG --- datasetManager.__init__ >>> ../../../dataset/DESED/dataset/audio/dcase2020_dataset_22050.hdf5[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../../../dataset/DESED/dataset/metadata/train/weak.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../../../dataset/DESED/dataset/metadata/train/unlabel_in_domain.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../../../dataset/DESED/dataset/met

## Add all training subsets and validation subset

In [None]:
S_manager.add_subset("weak")
S_manager.add_subset("synthetic20")
S_manager.add_subset("validation")

U_manager.add_subset("unlabel_in_domain")

[1;37mINFO --- datasetManager._add_train_metadata >>> Loading metadata for: weak[0m
[1;37mINFO --- datasetManager._add_train_subset >>> Loading dataset: train, subset: weak[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/weak[0m
[1;37mINFO --- datasetManager._add_train_metadata >>> Loading metadata for: synthetic20[0m


100%|██████████| 7582/7582 [00:14<00:00, 506.80it/s]

[1;37mINFO --- datasetManager._add_train_subset >>> Loading dataset: train, subset: synthetic20[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/synthetic20[0m



100%|██████████| 4251/4251 [00:08<00:00, 483.57it/s]

[1;37mINFO --- datasetManager._add_val_subset >>> Loading dataset: validation[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/validation[0m





[1;37mINFO --- datasetManager._add_train_metadata >>> Loading metadata for: unlabel_in_domain[0m
[1;37mINFO --- datasetManager._add_train_subset >>> Loading dataset: train, subset: unlabel_in_domain[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/unlabel_in_domain[0m


## Prepare the augmentations

In [None]:
S_augments = [
    
]

U_augments = [
    
]

## Prepare the datasets

In [None]:
# prepare the sampler with the specified number of supervised file
train_S_dataset = DESEDDataset(S_manager, train=True, val=False, weak=True, strong=True, augments=S_augments, cached=False)
train_U_dataset = DESEDDataset(U_manager, train=True, val=False, weak=False, strong=False, augments=U_augments, cached=False)
val_dataset = DESEDDataset(S_manager, train=False, val=True, weak=True, strong=True, augments=[], cached=True)


In [None]:
len(train_S_dataset), len(train_U_dataset), len(val_dataset)

## Prepare the dataloader
Batch_size must be divided into a supervised minibatch and an unsupervised one.

In [None]:
nb_S_files = len(train_S_dataset)
nb_U_files = len(train_U_dataset)

ratio = nb_S_files / nb_U_files

batch_size = args.batchsize
S_batch_size = int(np.floor(batch_size * ratio))
U_batch_size = int(np.ceil(batch_size * (1 - ratio)))

nb_batch = (nb_S_files + nb_U_files) / batch_size

print("S_batch_size: ", S_batch_size)
print("U_batch_size: ", U_batch_size)
print(nb_batch)

In [None]:
train_S1_loader = torch.utils.data.DataLoader(train_S_dataset, batch_size=S_batch_size, shuffle=True, num_workers=10)
train_S2_loader = torch.utils.data.DataLoader(train_S_dataset, batch_size=S_batch_size, shuffle=True, num_workers=10)
train_U_loader = torch.utils.data.DataLoader(train_U_dataset, batch_size=U_batch_size, shuffle=True, num_workers=10)
train_loader = ZipCycle([train_S1_loader, train_S2_loader, train_U_loader])
val_loader = data.DataLoader(val_dataset, batch_size=32, num_workers=4)

# Models

In [None]:
model_func = dcase2019_model

m1, m2 = model_func(), model_func()

m1 = m1.cuda()
m2 = m2.cuda()


# Advertorch can't work with multiple output models.
def weaker(model):
    def wrapper(*args, **kwargs):
        weak_output, _ = model(*args, **kwargs)
        return weak_output
    return wrapper
    
    
    
weak_m1 = weaker(m1)
weak_m2 = weaker(m2)


# Prep training

## Adversarial generation
Multilabel adversarial generation paper from 2019: https://arxiv.org/pdf/1901.00546.pdf

In [None]:
# adversarial generation
adv_generator_1 = GradientSignAttack(
    weak_m1, loss_fn=nn.BCEWithLogitsLoss(reduction="sum"),
    eps=args.epsilon, clip_min=-math.inf, clip_max=math.inf, targeted=False
)

adv_generator_2 = GradientSignAttack(
    weak_m2, loss_fn=nn.BCEWithLogitsLoss(reduction="sum"),
    eps=args.epsilon, clip_min=-math.inf, clip_max=math.inf, targeted=False
)

## optimizers, Checkpoint, Warmup & callbacks 

In [None]:
params = list(m1.parameters()) + list(m2.parameters())
optimizer = optim.SGD(params, lr=args.base_lr, momentum=args.momentum, weight_decay=args.decay)

checkpoint_m1 = CheckPoint(m1, optimizer, mode="max", name="../models/best_dct_m1.torch")
checkpoint_m2 = CheckPoint(m2, optimizer, mode="max", name="../models/best_dct_m2.torch")

lr_lambda = lambda epoch: (1.0 + math.cos((epoch-1)*math.pi/args.epochs))
lr_scheduler = LambdaLR(optimizer, lr_lambda)

# define the warmups
lambda_cot = Warmup(args.lambda_cot_max, args.warm_up, sigmoid_rampup)
lambda_diff = Warmup(args.lambda_diff_max, args.warm_up, sigmoid_rampup)

callbacks = [lr_scheduler, lambda_cot, lambda_diff]

## Metrics and hyperparameters

In [None]:
# define the metrics
ratioS = [BinaryRatio(), BinaryRatio()]
weak_fscores = [FScore(), FScore()]
strong_fscores = [FScore(), FScore()]
avg_losses = {
    "l_sup": ContinueAverage(),
    "l_cot": ContinueAverage(),
    "l_diff": ContinueAverage(),
    "total": ContinueAverage()   
}


def reset_all_metrics():
    all_metrics = [*ratioS, *weak_fscores, *strong_fscores, *avg_losses.values()]
    for m in all_metrics:
        m.reset()
        
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
    
def partial_eval(model):
    for c in model.children():
        if not isinstance(c, nn.GRU):
            c.train(False)
            
def binarize(tensor, threshold: float = 0.5, apply_sigmoid: bool = False):
    if apply_sigmoid:
        tensor = torch.sigmoid(tensor)
        
    tensor[tensor >= threshold] = 1
    tensor[tensor < threshold] = 0
    
    return tensor

title = "%s_%s_%slcm_%sldm_%swl" % (
    get_datetime(),
    args.job_name,
    args.lambda_cot_max,
    args.lambda_diff_max,
    args.warm_up,
)
tensorboard = SummaryWriter("%s/%s" % (args.tensorboard_dir, title))

## Weak / Strong DCT lossFalse

# Training

In [None]:
UNDERLINE_SEQ = "\033[1;4m"
RESET_SEQ = "\033[0m"

header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6}| {:<6.6}| {:<8.8}| {:<6.6} - {:<9.9}  {:<9.9}| {:<11.11}- {:<6.6}"

value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.3f}| {:<6.3f}| {:<8.3f}| {:<6.3f} - {:<9.9} {:<9.3f}| {:<11.3f}- {:<6.3f}"

train_form = value_form
val_form = UNDERLINE_SEQ + value_form + RESET_SEQ

header = header_form.format(
    "", "Epoch", "%", "Losses:", "sup", "cot ", "diff", "Total ", "metrics:", "Weak F1 ", "Strong F1", "Time"
)

print(header)

In [None]:
def train(epoch):
    m1.train()
    m2.train()
    
    reset_all_metrics()

    running_loss = 0.0
    ls = 0.0
    lc = 0.0 
    ld = 0.0
    
    start_time = time.time()
    print("")
    
    for batch, (S1, S2, U) in enumerate(train_loader):
        # Separate the diff component of the minibatch
        X_S1, X_S2, X_U = S1[0], S2[0], U[0]
        weak_y_S1, strong_y_S1 = S1[1][0], S1[1][1]
        weak_y_S2, strong_y_S2 = S2[1][0], S2[1][1]
        
        X_S1, X_S2, X_U = X_S1.cuda().float(), X_S2.cuda().float(), X_U.cuda().float()
        weak_y_S1, strong_y_S1 = weak_y_S1.cuda().float(), strong_y_S1.cuda().float()
        weak_y_S2, strong_y_S2 = weak_y_S2.cuda().float(), strong_y_S2.cuda().float()

        # Predict all minibatch component separately
        weak_logits_S1, strong_logits_S1 = m1(X_S1)
        weak_logits_S2, strong_logits_S2 = m2(X_S2)
        weak_logits_U1, strong_logits_U1 = m1(X_U)
        weak_logits_U2, strong_logits_U2 = m2(X_U)

        weak_pred_S1 = binarize(weak_logits_S1, apply_sigmoid=True)
        weak_pred_S2 = binarize(weak_logits_S2, apply_sigmoid=True)
        weak_pred_U1 = binarize(weak_logits_U1, apply_sigmoid=True)
        weak_pred_U2 = binarize(weak_logits_U2, apply_sigmoid=True)
        _, weak_pred_S1 = torch.max(weak_logits_S1, 1)
        _, weak_pred_S2 = torch.max(weak_logits_S2, 1)

        # pseudo labels of U 
        _, weak_pred_U1 = torch.max(weak_logits_U1, 1)
        _, weak_pred_U2 = torch.max(weak_logits_U2, 1)

        ======== Generate adversarial examples ========
        # fix batchnorm ----
        partial_eval(m1)
        partial_eval(m2)
#         m1.eval()
#         m2.eval()

        #generate adversarial examples ----
        weak_adv_data_S1 = adv_generator_1.perturb(X_S1, weak_y_S1)
        weak_adv_data_U1 = adv_generator_1.perturb(X_U, weak_pred_U1)
        weak_adv_data_S2 = adv_generator_2.perturb(X_S2, weak_y_S2)
        weak_adv_data_U2 = adv_generator_2.perturb(X_U, weak_pred_U2)

        m1.train()
        m2.train()

        # predict adversarial examples ----
        weak_adv_logits_S1, _ = m1(weak_adv_data_S2)
        weak_adv_logits_S2, _ = m2(weak_adv_data_S1)

        weak_adv_logits_U1, _ = m1(weak_adv_data_U2)
        weak_adv_logits_U2, _ = m2(weak_adv_data_U1)

        # ======== calculate the differents loss ========
        # zero the parameter gradients ----
        optimizer.zero_grad()
        m1.zero_grad()
        m2.zero_grad()

        # losses ----
        # L_sup. weak_synth_loss already take care of applyinh the mask on the synth component
        weak_l_sup_S1, strong_l_sup_S1, total_l_sup_S1 = weak_synth_loss(weak_logits_S1, strong_logits_S1, weak_y_S1, strong_y_S1)
        weak_l_sup_S2, strong_l_sup_S2, total_l_sup_S2 = weak_synth_loss(weak_logits_S2, strong_logits_S2, weak_y_S2, strong_y_S2)
        l_sup = total_l_sup_S1 + total_l_sup_S2
        
        # L_cot. Apply on both weak and strong prediction.
        # Mask is not needed since there is no label used
        weak_l_cot = loss_cot(weak_logits_U1, weak_logits_U2)
        strong_l_cot = loss_cot(strong_logits_U1, strong_logits_U2)
        l_cot = weak_l_cot + strong_l_cot
        
        # L_diff. Since the adversarial sample are generated using the weak prediction, then L_diff is compute only using weak anotation
        pld_S, pld_U, l_diff = p_loss_diff(
            weak_logits_S1, weak_logits_S2, weak_adv_logits_S1, weak_adv_logits_S2,
            weak_logits_U1, weak_logits_U2, weak_adv_logits_U1, weak_adv_logits_U2
        )
        
        total_loss = l_sup + lambda_cot() * l_cot # + lambda_diff() * l_diff
        total_loss.backward()
        optimizer.step()

        # ======== Calc the metrics ========
        with torch.no_grad():
            # accuracies ----

            weak_f1_S1 = weak_fscores[0](weak_logits_S1, weak_y_S1)
            weak_f1_S2 = weak_fscores[1](weak_logits_S2, weak_y_S2)
            strong_f1_S1 = strong_fscores[0](strong_logits_S1, strong_y_S1)
            strong_f1_S2 = strong_fscores[1](strong_logits_S2, strong_y_S2)

            # ratios  ----
            weak_adv_pred_S1 = binarize(weak_adv_logits_S1, apply_sigmoid=True)
            weak_adv_pred_S2 = binarize(weak_adv_logits_S2, apply_sigmoid=True)

            ratio_S1 = ratioS[0](weak_adv_pred_S1, weak_y_S1)
            ratio_S2 = ratioS[1](weak_adv_pred_S2, weak_y_S2)
            # ========

            avg_total_loss = avg_losses["total"](total_loss.item())
            avg_ls = avg_losses["l_sup"](l_sup.item())
            avg_lc = avg_losses["l_cot"](l_cot.item())
            avg_ld = avg_losses["l_diff"](l_diff.item())
            
            # print statistics
            print(train_form.format(
                "Training: ",
                epoch,
                "%d %%" % int((batch / nb_batch) * 100),
                "", avg_ls, avg_lc, avg_ld, avg_total_loss,
#                 "", avg_ls, avg_lc, 0.0, avg_total_loss,
                "", weak_f1_S1, strong_f1_S1,
                time.time() - start_time,
            ), end="\r")

    # using tensorboard to monitor loss and acc\n",
    tensorboard.add_scalar('train/total_loss', total_loss.item(), epoch)
    tensorboard.add_scalar('train/Lsup', l_sup.item(), epoch )
    tensorboard.add_scalar('train/Lcot', l_cot.item(), epoch )
    tensorboard.add_scalar('train/Ldiff', l_diff.item(), epoch )
    tensorboard.add_scalar("train/weak_f1_S1", weak_f1_S1, epoch )
    tensorboard.add_scalar("train/weak_f1_S2", weak_f1_S2, epoch )

    tensorboard.add_scalar("detail_loss/Lsup_S1", total_l_sup_S1.item(), epoch)
    tensorboard.add_scalar("detail_loss/Lsup_S2", total_l_sup_S2.item(), epoch)
    tensorboard.add_scalar("detail_loss/Ldiff_S", pld_S.item(), epoch)
    tensorboard.add_scalar("detail_loss/Ldiff_U", pld_U.item(), epoch)

    tensorboard.add_scalar("detail_acc/weak_f1_S1", weak_f1_S1, epoch)
    tensorboard.add_scalar("detail_acc/weak_f1_S2", weak_f1_S2, epoch)

    tensorboard.add_scalar("detail_ratio/ratio_S1", ratio_S1, epoch)
    tensorboard.add_scalar("detail_ratio/ratio_S2", ratio_S2, epoch)
    
    # Return the total loss to check for NaN
    return total_loss.item()

In [None]:
def test(epoch):
    global best_acc
    
    m1.eval()
    m2.eval()
    
    reset_all_metrics()
    print("")
    start_time = time.time()
    
    with torch.no_grad():
        for batch, (X, y) in enumerate(val_loader):
            X = X.cuda().float()
            weak_y = y[0].cuda().float()
            strong_y = y[1].cuda().float()

            weak_logits_1, strong_logits_1 = m1(X)
            weak_logits_2, strong_logits_2 = m2(X)
            
            weak_f1_S1 = weak_fscores[0](weak_logits_1, weak_y)
            weak_f1_S2 = weak_fscores[1](weak_logits_2, weak_y)
            
            strong_f1_S1 = strong_fscores[0](strong_logits_1, strong_y)
            strong_f1_S2 = strong_fscores[1](strong_logits_2, strong_y)

            # print statistics
            print(val_form.format(
                "Validation: ",
                epoch,
                int((batch / len(val_loader)) * 100),
                "", 0.0, 0.0, 0.0, 0.0,
                "", weak_f1_S1, strong_f1_S1,
                time.time() - start_time,
            ), end="\r")
    
    tensorboard.add_scalar("val/weak_f1_S1", weak_f1_S1, epoch)
    tensorboard.add_scalar("val/weak_f1_S2", weak_f1_S2, epoch)
    tensorboard.add_scalar("val/strong_f1_S1", strong_f1_S1, epoch)
    tensorboard.add_scalar("val/strong_f1_S2", strong_f1_S2, epoch)
    
    tensorboard.add_scalar("detail_hyperparameters/lambda_cot", lambda_cot(), epoch)
    tensorboard.add_scalar("detail_hyperparameters/lambda_diff", lambda_diff(), epoch)
    tensorboard.add_scalar("detail_hyperparameters/learning_rate", get_lr(optimizer), epoch)

    # Apply callbacks
    for c in callbacks:
        c.step()


In [None]:
print(header)
for epoch in range(0, args.epochs):
    total_loss = train(epoch)
    
    if np.isnan(total_loss):
        print("Losses are NaN, stoping the training here")
        break
        
    test(epoch)

# tensorboard.export_scalars_to_json('./' + args.tensorboard_dir + 'output.json')
# tensorboard.close()


# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪