# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["NUMEXPR_NU M_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"
import numpy as np

import time
import math
import pickle
import argparse
import random
from random import shuffle
from tqdm import tqdm_notebook as tqdm

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
import torch.utils.data as data
from torch.utils.tensorboard import SummaryWriter
from advertorch.attacks import GradientSignAttack
from torch.nn.utils import weight_norm

In [3]:
import sys
sys.path.append("../src/")

from datasetManager import DatasetManager
from generators import Dataset, CoTrainingDataset
from samplers import CoTrainingSampler
import signal_augmentations as sa 

from models import cnn
from losses import loss_cot, loss_diff, loss_diff, p_loss_diff, p_loss_sup
from metrics import CategoricalAccuracy, Ratio
from ramps import Warmup, sigmoid_rampup

# Utils

## Arguments

In [4]:
class Args:
    def __init__(self):
        self.sess = "default"
        self.nb_view = 2
        self.batchsize = 100
        self.lambda_cot_max = 10
        self.lambda_diff_max = 0.5
        self.ratio = 0.1
        self.seed = 1234
        self.epochs = 600
        self.warm_up = 80
        self.momentum = 0.0
        self.decay = 1e-3
        self.epsilon = 0.02
        self.num_class = 10
        self.cifar10_dir = "/corpus/corpus/UrbanSound8K"
        self.tensorboard_dir = "tensoboard_cotraining"
        self.checkpoint_dir = "checkpoint"
        self.base_lr = 0.05
        self.resume = False
        self.job_name = "default"
        self.multi_gpu = False
args = Args()

## Reproducibility

In [5]:
def reset_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic=True
    torch.backends.cudnn.benchmark = False
reset_seed(args.seed)

In [6]:
import datetime

def get_datetime():
    now = datetime.datetime.now()
    return str(now)[:10] + "_" + str(now)[11:-7]

# Prep Co-Training

## dataset

In [7]:
# load the data
audio_root = "../dataset/audio"
metadata_root = "../dataset/metadata"
manager = DatasetManager(metadata_root, audio_root, subsampling=1, subsampling_method="balance", verbose=2)

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))


nb file loaded: 7895


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


nb file loaded: 837


In [16]:
# prepare the sampler with the specified number of supervised file
train_dataset = CoTrainingDataset(manager, args.ratio, train=True, val=False, cached=False)
val_dataset = CoTrainingDataset(manager, 1.0, train=False, val=True, cached=False)

train_sampler = CoTrainingSampler(train_dataset, args.batchsize, nb_class=10, nb_view=args.nb_view, ratio=None, method="duplicate") # ratio is manually set here

## Models

In [17]:
model_func = cnn

m1, m2 = model_func(), model_func()

m1 = m1.cuda()
m2 = m2.cuda()

## Loaders & adversarial generators

In [18]:
train_loader = data.DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=10)
val_loader = data.DataLoader(val_dataset, batch_size=128, num_workers=4)

# adversarial generation
adv_generator_1 = GradientSignAttack(
    m1, loss_fn=nn.CrossEntropyLoss(reduction="sum"),
    eps=args.epsilon, clip_min=-math.inf, clip_max=math.inf, targeted=False
)

adv_generator_2 = GradientSignAttack(
    m2, loss_fn=nn.CrossEntropyLoss(reduction="sum"),
    eps=args.epsilon, clip_min=-math.inf, clip_max=math.inf, targeted=False
)

## optimizers, Warmup & callbacks 

In [19]:
params = list(m1.parameters()) + list(m2.parameters())
optimizer = optim.SGD(params, lr=args.base_lr, momentum=args.momentum, weight_decay=args.decay)

lr_lambda = lambda epoch: (1.0 + math.cos((epoch-1)*math.pi/args.epochs))
lr_scheduler = LambdaLR(optimizer, lr_lambda)

# define the warmups
lambda_cot = Warmup(args.lambda_cot_max, args.warm_up, sigmoid_rampup)
lambda_diff = Warmup(args.lambda_diff_max, args.warm_up, sigmoid_rampup)

callbacks = [lr_scheduler, lambda_cot, lambda_diff]

## Metrics and hyperparameters

In [20]:
# define the metrics
ratioS = [Ratio(), Ratio()]
ratioU = [Ratio(), Ratio()]
ratioSU = [Ratio(), Ratio()]
accS = [CategoricalAccuracy(), CategoricalAccuracy()]
accU = [CategoricalAccuracy(), CategoricalAccuracy()]
accSU = [CategoricalAccuracy(), CategoricalAccuracy()]

def reset_all_metrics():
    all_metrics = [*ratioS, *ratioU, *ratioSU, *accS, *accU, *accSU]
    for m in all_metrics:
        m.reset()
        
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

title = "%s_%s_%slcm_%sldm_%swl" % (
    get_datetime(),
    args.job_name,
    args.lambda_cot_max,
    args.lambda_diff_max,
    args.warm_up,
)
tensorboard = SummaryWriter("%s/%s" % (args.tensorboard_dir, title))

# Training

In [21]:
def train(epoch):
    m1.train()
    m2.train()

    running_loss = 0.0
    ls = 0.0
    lc = 0.0 
    ld = 0.0
    
    start_time = time.time()
    print("")
    
    for batch, (X, y) in enumerate(train_loader):
        X = [x.squeeze() for x in X]
        y = [y_.squeeze() for y_ in y]
    
        # separate Supervised (S) and Unsupervised (U) parts
        X_S, X_U = X[:-1], X[-1]
        y_S, y_U = y[:-1], y[-1]
        
        for i in range(len(X_S)):
            X_S[i] = X_S[i].cuda()
            y_S[i] = y_S[i].cuda()
        X_U, y_U = X_U.cuda(), y_U.cuda()

        logits_S1 = m1(X_S[0])
        logits_S2 = m2(X_S[1])
        logits_U1 = m1(X_U)
        logits_U2 = m2(X_U)

        _, pred_S1 = torch.max(logits_S1, 1)
        _, pred_S2 = torch.max(logits_S2, 1)

        # pseudo labels of U 
        _, pred_U1 = torch.max(logits_U1, 1)
        _, pred_U2 = torch.max(logits_U2, 1)

        # ======== Generate adversarial examples ========
        # fix batchnorm ----
        m1.eval()
        m2.eval()

        #generate adversarial examples ----
        adv_data_S1 = adv_generator_1.perturb(X_S[0], y_S[0])
        adv_data_U1 = adv_generator_1.perturb(X_U, pred_U1)

        adv_data_S2 = adv_generator_2.perturb(X_S[1], y_S[1])
        adv_data_U2 = adv_generator_2.perturb(X_U, pred_U2)

        m1.train()
        m2.train()

        # predict adversarial examples ----
        adv_logits_S1 = m1(adv_data_S2)
        adv_logits_S2 = m2(adv_data_S1)

        adv_logits_U1 = m1(adv_data_U2)
        adv_logits_U2 = m2(adv_data_U1)

        # ======== calculate the differents loss ========
        # zero the parameter gradients ----
        optimizer.zero_grad()
        m1.zero_grad()
        m2.zero_grad()

        # losses ----
        Loss_sup_S1, Loss_sup_S2, Loss_sup = p_loss_sup(logits_S1, logits_S2, y_S[0], y_S[1])
        Loss_cot = loss_cot(logits_U1, logits_U2)
        pld_S, pld_U, Loss_diff = p_loss_diff(logits_S1, logits_S2, adv_logits_S1, adv_logits_S2, logits_U1, logits_U2, adv_logits_U1, adv_logits_U2)
        
        total_loss = Loss_sup + lambda_cot() * Loss_cot + lambda_diff() * Loss_diff
        total_loss.backward()
        optimizer.step()

        # ======== Calc the metrics ========
        # accuracies ----
        pred_SU1 = torch.cat((pred_S1, pred_U1), 0)
        pred_SU2 = torch.cat((pred_S2, pred_U2), 0)
        y_SU1 = torch.cat((y_S[0], y_U), 0)
        y_SU2 = torch.cat((y_S[1], y_U), 0)

        acc_S1 = accS[0](pred_S1, y_S[0])
        acc_S2 = accS[1](pred_S2, y_S[1])
        acc_U1 = accU[0](pred_U1, y_U)
        acc_U2 = accU[1](pred_U2, y_U)
        acc_SU1 = accSU[0](pred_SU1, y_SU1)
        acc_SU2 = accSU[1](pred_SU2, y_SU2)
        
        # ratios  ----
        _, adv_pred_S1 = torch.max(adv_logits_S1, 1)
        _, adv_pred_S2 = torch.max(adv_logits_S2, 1)
        _, adv_pred_U1 = torch.max(adv_logits_U1, 1)
        _, adv_pred_U2 = torch.max(adv_logits_U2, 1)

        adv_pred_SU1 = torch.cat((adv_pred_S1, adv_pred_U1), 0)
        adv_pred_SU2 = torch.cat((adv_pred_S2, adv_pred_U2), 0)
        adv_y_SU1 = torch.cat((y_S[0], pred_U1), 0)
        adv_y_SU2 = torch.cat((y_S[1], pred_U2), 0)

        ratio_S1 = ratioS[0](adv_pred_S1, y_S[0])
        ratio_S2 = ratioS[1](adv_pred_S2, y_S[1])
        ratio_U1 = ratioU[0](adv_pred_U1, pred_U1)
        ratio_U2 = ratioU[1](adv_pred_U2, pred_U2)
        ratio_SU1 = ratioSU[0](adv_pred_SU1, adv_y_SU1)
        ratio_SU2 = ratioSU[1](adv_pred_SU2, adv_y_SU2)
        # ========
        
        running_loss += total_loss.item()
        ls += Loss_sup.item()
        lc += Loss_cot.item()
        ld += Loss_diff.item()
        
        # print statistics
        msg = "Epoch %s: %.2f%% : train acc: %.3f %.3f - Loss: %.3f %.3f %.3f %.3f - time: %.2f" % (
            epoch, (batch / len(train_sampler)) * 100,
            acc_SU1, acc_SU2,
            running_loss/(batch+1), ls/(batch+1), lc/(batch+1), ld/(batch+1),
            time.time() - start_time,
        )
        print(msg, end="\r")

    # using tensorboard to monitor loss and acc\n",
    tensorboard.add_scalar('train/total_loss', total_loss.item(), epoch)
    tensorboard.add_scalar('train/Lsup', Loss_sup.item(), epoch )
    tensorboard.add_scalar('train/Lcot', Loss_cot.item(), epoch )
    tensorboard.add_scalar('train/Ldiff', Loss_diff.item(), epoch )
    tensorboard.add_scalar("train/acc_1", acc_SU1, epoch )
    tensorboard.add_scalar("train/acc_2", acc_SU2, epoch )

    tensorboard.add_scalar("detail_loss/Lsus S1", Loss_sup_S1.item(), epoch)
    tensorboard.add_scalar("detail_loss/Lsus S2", Loss_sup_S2.item(), epoch)
    tensorboard.add_scalar("detail_loss/Ldiff S", pld_S.item(), epoch)
    tensorboard.add_scalar("detail_loss/Ldiff U", pld_U.item(), epoch)

    tensorboard.add_scalar("detail_acc/acc S1", acc_S1, epoch)
    tensorboard.add_scalar("detail_acc/acc S2", acc_S2, epoch)
    tensorboard.add_scalar("detail_acc/acc U1", acc_U1, epoch)
    tensorboard.add_scalar("detail_acc/acc U2", acc_U2, epoch)

    tensorboard.add_scalar("detail_ratio/ratio S1", ratio_S1, epoch)
    tensorboard.add_scalar("detail_ratio/ratio S2", ratio_S2, epoch)
    tensorboard.add_scalar("detail_ratio/ratio U1", ratio_U1, epoch)
    tensorboard.add_scalar("detail_ratio/ratio U2", ratio_U2, epoch)
    tensorboard.add_scalar("detail_ratio/ratio SU1", ratio_SU1, epoch)
    tensorboard.add_scalar("detail_ratio/ratio SU2", ratio_SU2, epoch)
    
    # Return the total loss to check for NaN
    return total_loss.item(), msg

In [22]:
def test(epoch, msg = ""):
    global best_acc
    
    m1.eval()
    m2.eval()
    
    correct1 = 0
    correct2 = 0
    total1 = 0
    total2 = 0
    
    with torch.no_grad():
        for batch_idx, (X, y) in enumerate(val_loader):
            X = X.squeeze()
            y = y.squeeze()

            # separate Supervised (S) and Unsupervised (U) parts
            X = X.cuda()
            y = y.cuda()

            outputs1 = m1(X)
            predicted1 = outputs1.max(1)
            total1 += y.size(0)
            correct1 += predicted1[1].eq(y).sum().item()

            outputs2 = m2(X)
            predicted2 = outputs2.max(1)
            total2 += y.size(0)
            correct2 += predicted2[1].eq(y).sum().item()

    msg += '\tnet1 test acc: %.3f%% (%d/%d) | net2 test acc: %.3f%% (%d/%d)' % (
        100.*correct1/total1, correct1, total1, 100.*correct2/total2, correct2, total2)
    print(msg, end="")
    
    tensorboard.add_scalar("val/acc 1", correct1 / total1, epoch)
    tensorboard.add_scalar("val/acc 2", correct2 / total2, epoch)
    
    tensorboard.add_scalar("detail_hyperparameters/lambda_cot", lambda_cot(), epoch)
    tensorboard.add_scalar("detail_hyperparameters/lambda_diff", lambda_diff(), epoch)
    tensorboard.add_scalar("detail_hyperparameters/learning_rate", get_lr(optimizer), epoch)

    # Apply callbacks
    for c in callbacks:
        c.step()


In [None]:
for epoch in range(0, args.epochs):
    total_loss, msg = train(epoch)
    
    if np.isnan(total_loss):
        print("Losses are NaN, stoping the training here")
        break
        
    test(epoch, msg)

# tensorboard.export_scalars_to_json('./' + args.tensorboard_dir + 'output.json')
# tensorboard.close()


# Prep Supervised training (same ratio)

## dataset

In [68]:
# load the data
audio_root = "../dataset/audio"
metadata_root = "../dataset/metadata"
manager = DatasetManager(metadata_root, audio_root, verbose=2)

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))


nb file loaded: 7895


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


nb file loaded: 837


In [69]:
# Prep dataset
train_dataset = CoTrainingDataset(manager, args.ratio, train=True, val=False, cached=False)
val_dataset = CoTrainingDataset(manager, 1.0, train=False, val=True, cached=False)

train_sampler = CoTrainingSampler(train_dataset, args.batchsize, nb_class=10, nb_view=args.nb_view, ratio=None, method="duplicate") # ratio is manually set here

## Model

In [70]:
model_func = cnn

m1 = model_func()

m1 = m1.cuda()

## Loaders & adversarial generators

In [71]:
train_loader = data.DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=10)
val_loader = data.DataLoader(val_dataset, batch_size=128, num_workers=4)

## optimizers & callbacks & criterion

In [72]:
params = m1.parameters()
optimizer = optim.SGD(params, lr=args.base_lr, momentum=args.momentum, weight_decay=args.decay)

lr_lambda = lambda epoch: (1.0 + math.cos((epoch-1)*math.pi/args.epochs))
lr_scheduler = LambdaLR(optimizer, lr_lambda)

criterion = nn.CrossEntropyLoss()

callbacks = [lr_scheduler]

## Metrics and hyperparameters

In [73]:
# define the metrics
acc_func = CategoricalAccuracy()

def reset_all_metrics():
    acc_func.reset()
        
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

title = "%s_supervised" % (get_datetime())
tensorboard = SummaryWriter("%s/%s" % (args.tensorboard_dir, title))

# Training

In [74]:
def train(epoch):
    m1.train()
    reset_all_metrics()

    running_loss = 0.0
    
    start_time = time.time()
    print("")
    
    for batch, (X, y) in enumerate(train_loader):
        X = [x.squeeze() for x in X]
        y = [y_.squeeze() for y_ in y]
    
        # separate Supervised (S) and Unsupervised (U) parts
        X_S, X_U = X[:-1], X[-1]
        y_S, y_U = y[:-1], y[-1]
        
        # Only one view interesting, no U
        X_S = X_S[0]
        y_S = y_S[0]
        
        X_S, y_S = X_S.cuda(), y_S.cuda()

        # ======== perform prediction ========
        logits_S = m1(X_S)
        _, pred_S = torch.max(logits_S, 1)

        # ======== calculate loss ========
        loss_sup = criterion(logits_S, y_S)
        total_loss = loss_sup
        
        # ======== backpropagation =======
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # ======== Calc the metrics ========
        acc = acc_func(pred_S, y_S)
        running_loss += total_loss.item()
        
        # print statistics
        msg = "Epoch %s: %.2f%% : train acc: %.3f - Loss: %.3f - time: %.2f" % (
            epoch, (batch / len(train_sampler)) * 100,
            acc,
            running_loss / (batch+1),
            time.time() - start_time,
        )
        print(msg, end="\r")

    # using tensorboard to monitor loss and acc\n",
    tensorboard.add_scalar('train/total_loss', total_loss.item(), epoch)
    tensorboard.add_scalar('train/acc', acc, epoch)
    
    # Return the total loss to check for NaN
    return total_loss.item(), msg

In [75]:
def test(epoch, msg=""):
    m1.eval()
    
    reset_all_metrics()
    
    with torch.no_grad():
        for batch_idx, (X, y) in enumerate(val_loader):
            X = X.squeeze()
            y = y.squeeze()

            # separate Supervised (S) and Unsupervised (U) parts
            X = X.cuda()
            y = y.cuda()

            logits = m1(X)
            _, pred = torch.max(logits, 1)
            
            loss_val = criterion(logits, y)
            
            acc_val = acc_func(pred, y)
        
        msg += "\nEpoch %s: Val acc: %.3f - loss: %.3f" % (
            epoch,
            acc_val,
            loss_val.item()
        )
        print(msg, end="")
    
    tensorboard.add_scalar("val/acc", acc_val, epoch)
    tensorboard.add_scalar("val/loss", loss_val.item(), epoch)
    
    tensorboard.add_scalar("detail_hyperparameters/learning_rate", get_lr(optimizer), epoch)

    # Apply callbacks
    for c in callbacks:
        c.step()


In [76]:
for epoch in range(0, args.epochs):
    total_loss, msg = train(epoch)
    
    if np.isnan(total_loss):
        print("Losses are NaN, stoping the training here")
        break
        
    test(epoch)


Epoch 0: 97.44% : train acc: 0.288 - Loss: 2.005 - time: 9.38
Epoch 0: Val acc: 0.184 - loss: 2.093
Epoch 1: 97.44% : train acc: 0.445 - Loss: 1.631 - time: 9.67
Epoch 1: Val acc: 0.399 - loss: 2.094
Epoch 2: 97.44% : train acc: 0.491 - Loss: 1.557 - time: 9.75
Epoch 2: Val acc: 0.320 - loss: 2.192


Exception ignored in: <function _releaseLock at 0x7f8d89278320>
Traceback (most recent call last):
  File "/home/lcances/.miniconda3/envs/dl/lib/python3.7/logging/__init__.py", line 221, in _releaseLock
    def _releaseLock():
KeyboardInterrupt
Exception ignored in: <function _releaseLock at 0x7f8d89278320>
Traceback (most recent call last):
  File "/home/lcances/.miniconda3/envs/dl/lib/python3.7/logging/__init__.py", line 221, in _releaseLock
    def _releaseLock():
KeyboardInterrupt


RuntimeError: DataLoader worker (pid(s) 32534, 32535, 32537, 32538, 32539, 32540, 32542, 32543) exited unexpectedly

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪

# Statistic on inputs

In [1]:
from scipy.stats import kurtosis

for batch, (X, y) in enumerate(train_loader):
        X = [x.squeeze() for x in X]
        y = [y_.squeeze() for y_ in y]
    
        # separate Supervised (S) and Unsupervised (U) parts
        X_S, X_U = X[:-1], X[-1]
        y_S, y_U = y[:-1], y[-1]
        
        X_U = X_U.numpy()
        print(X_U.shape)
        
        # max
        val = X_U.max(axis=(1, 2))
        print("max max: ", val.max())
        print("min max: ", val.min())
        print("mean max: ", val.mean())
        print("std max: ", val.std())
        print("kurosis: ", kurtosis(val))
        
        # min
        print("------------------")
        val = X_U.min(axis=(1, 2))
        print("max max: ", val.max())
        print("min max: ", val.min())
        print("mean max: ", val.mean())
        print("std max: ", val.std())
        print("kurosis: ", kurtosis(val))
        
        # mean
        print("------------------")
        val = X_U.mean(axis=(1, 2))
        print("max max: ", val.max())
        print("min max: ", val.min())
        print("mean max: ", val.mean())
        print("std max: ", val.std())
        print("kurosis: ", kurtosis(val))
        
        # std
        print("------------------")
        val = X_U.std(axis=(1, 2))
        print("max max: ", val.max())
        print("min max: ", val.min())
        print("mean max: ", val.mean())
        print("std max: ", val.std())
        print("kurosis: ", kurtosis(val))
        
        break

NameError: name 'train_loader' is not defined