In [1]:
%load_ext autoreload
%autoreload 2
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import gc
import os
import pickle
import random
import time
from collections import Counter, defaultdict
from functools import partial
from pathlib import Path
from psutil import cpu_count

import librosa
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
#from skmultilearn.model_selection import iterative_train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

import tensorboardX

from freesound.utils.general import seed_everything, setup_tboard_writer
from freesound.utils.lwlwrap import calculate_per_class_lwlrap
from freesound.spec_augment import augment_spectrogram as augspecorig
from freesound.imaug_seqs import imgaug_seqs_dict
from freesound.archis.large import Classifier

import bz2
from freesound.preprocessor import Preprocessor
import pylab as plt
from pathlib import Path
import os

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'


## PARAMS ##
RUN_NAME = 'mixmatch_ALPHA_X=0.35'
SEED = 2019
PREPROCESSOR_CONFIG_NAME = 'default'
RANDOM_VOLUME = 0.8
SPEC_AUGMENT_PROB = 0.25
MIXUP_ALPHA = 0.3
IMGAUG_SEQ = 'default'
BATCH_SIZE = 64
LR = 3e-3
LR_MIN = 1e-5
T_MAX = 10
NUM_EPOCHS = 300
############

preprocessor_config_path = 'config/preprocessing/{}.yaml'.format(PREPROCESSOR_CONFIG_NAME)
augment_spectrogram = lambda x: augspecorig(x, RANDOM_VOLUME, SPEC_AUGMENT_PROB)

In [4]:
seed_everything(SEED)
tboard_writer, tboard_log_dir = setup_tboard_writer(RUN_NAME)

In [5]:
N_JOBS = cpu_count()
os.environ['MKL_NUM_THREADS'] = str(N_JOBS)
os.environ['OMP_NUM_THREADS'] = str(N_JOBS)
DataLoader = partial(DataLoader, num_workers=N_JOBS)

In [6]:
dataset_dir = Path(os.environ['FS_INPUTS_BASE']) / 'freesound-audio-tagging-2019'

In [7]:
csvs = {
    'train_curated': dataset_dir / 'train_curated.csv',
    'train_noisy': dataset_dir / 'train_noisy.csv',
    'sample_submission': dataset_dir / 'sample_submission.csv',
    'test': dataset_dir / 'test.csv',
}

In [8]:
df_train_curated = pd.read_csv(csvs['train_curated'])
df_train_noisy = pd.read_csv(csvs['train_noisy'])
df_sample = pd.read_csv(csvs['sample_submission'])

In [9]:
labels = df_sample.columns[1:].tolist()

In [10]:
num_classes = len(labels)

In [11]:
def df_to_dummies(df):
    y_train = df['labels'].str.get_dummies(sep=',').values.astype(np.float32)
    assert y_train.shape[1] == 80
    return y_train

def df_to_x(df):
    return df.fname.values

def df_to_xy(df):
    y = df_to_dummies(df)
    x = df_to_x(df)
    assert len(x) == len(y)
    return x, y

In [12]:
x_train, y_train = df_to_xy(df_train_curated)
x_train_noisy, y_train_noisy = df_to_xy(df_train_noisy)
x_test = df_to_x(df_sample)
all_wavnames = np.append(x_train, x_train_noisy)

In [None]:
preproc = Preprocessor(preprocessor_config_path, dont_load=True)
preproc.fill_cache(all_wavnames)
# preproc.save_cache()

In [None]:
ALPHA_X = 0.35

def mixup_mod(x1, x2, y1, y2, alpha):
    # lambda is a reserved word in python, substituting by beta
    beta = np.random.beta(alpha, alpha) 
    beta = np.amax([beta, 1 - beta])
    x = beta * x1 + (1 - beta) * x2
    y = beta * y1 + (1 - beta) * y2
    return x, y

def sharpen(x, T):
    temp = x ** (1/T)
    return temp / temp.sum(dim=1, keepdim=True)

def label_guessing(model, ub, K):
    with torch.no_grad():
        was_training = model.training
        model.eval()
        pr = torch.sigmoid(model(ub))  # shape = [B*K, 80]
        if was_training:
            model.train()
        return pr.view(K, pr.shape[0] // K, -1).mean(0).data

def mixmatch_create_batch(x, y, Ux, model, T=0.5, alpha=0.75):
    # (x, y) is labeled batch of shape [batch_size, ...]
    # Ux should be unlabeled batch of shape [batch_size * K, ...] - K augmentations
    K = Ux.shape[0] // x.shape[0]
    avg_probs = label_guessing(model, Ux, K)
    qb = sharpen(avg_probs, T)
    Uy = qb.repeat([K, 1])
    # Randon shuffle according to the paper
    indices = np.arange(len(x) + len(Ux))
    np.random.shuffle(indices)
    # MixUp
    Wx = torch.cat([Ux, x], dim=0)[indices]
    Wy = torch.cat([Uy, y], dim=0)[indices]
    X, p = mixup_mod(x, Wx[:len(x)], y, Wy[:len(x)], ALPHA_X)
    U, q = mixup_mod(Ux, Wx[len(x):], Uy, Wy[len(x):], alpha)
    return X, p, U, q

In [None]:
seq = imgaug_seqs_dict[IMGAUG_SEQ]

In [None]:
from imgaug import augmenters as iaa
st = lambda aug, p=0.15: iaa.Sometimes(p, aug)  # noqa
seq = iaa.Sequential([
    st(iaa.Superpixels(p_replace=0.2, n_segments=(64, 256))),
    st(iaa.CropAndPad(px=((-5, 5), (-20, 20), (-5, 5), (-20, 20)))),
    st(iaa.GaussianBlur(sigma=(0.0, 1.5))),
    st(iaa.PiecewiseAffine(scale=(0.005, 0.02))),
    st(iaa.Add((-40, 40))),
    st(iaa.AdditiveGaussianNoise(loc=0., scale=(0.1, 10)))
])

In [None]:
def get_noisy_img_and_label(pp):
    idx = np.random.randint(len(x_train_noisy))
    return pp[x_train_noisy[idx]], y_train_noisy[idx]
        

class FATTrainDataset(Dataset):
    def __init__(self, preproc, fnames, labels, seq, mixup_alpha=MIXUP_ALPHA, is_training=True,
                 desired_length=128, no_labels=False, return_fnames=False, return_crop=False,
                 no_unlabeled=False, K=2):
        super().__init__()
        self.preproc = preproc
        self.fnames = fnames
        self.labels = labels
        self.seq = seq
        self.mixup_alpha = mixup_alpha
        self.is_training = is_training
        self.desired_length = desired_length
        self.no_labels = no_labels
        self.return_fnames = return_fnames
        self.return_crop = return_crop
        self.no_unlabeled = no_unlabeled
        self.K = K

        self.transforms = transforms.ToTensor()
        
    def __len__(self):
        return len(self.fnames)
    
    def preprep_img(self, image):
        if image.shape[0] == 1:
            image = np.tile(image, [3, 1, 1])
        image = np.transpose(image, [1, 2, 0])
        return image
    
    def crop_img(self, image):
        time_dim = image.shape[1]
        diff = time_dim - self.desired_length
        if diff > 0:
            crop = random.randint(0, diff)
            self.last_crop = crop
            image = image[:, crop:crop + self.desired_length]
        elif diff < 0:
            tmp = np.zeros([image.shape[0], self.desired_length, *image.shape[2:]],
                           dtype=image.dtype)
            start = random.randint(0, -diff)
            self.last_crop = start
            tmp[:, start:start + image.shape[1]] = image
            image = tmp
        return image
    
    def prep_img(self, image):
        if self.is_training:
            image = self.seq.augment_image(image)
        image = self.transforms(image)
        if self.is_training:
            image = augment_spectrogram(image)
        return image.div_(255)
            
    def _get_single_unlabled_img(self):
        u, _ = get_noisy_img_and_label(self.preproc)
        u = self.crop_img(self.preprep_img(u))
        return u
        
    def __getitem__(self, idx):
        image = self.preproc[self.fnames[idx]]
        if not self.no_labels:
            label = self.labels[idx]
        image = self.crop_img(self.preprep_img(image))
        if self.is_training and self.mixup_alpha:
            mixup_p = np.random.beta(self.mixup_alpha + 1, self.mixup_alpha)
            if mixup_p < 0.98:  # save compute when mixup barely has effect
                oth_image, oth_label = get_noisy_img_and_label(self.preproc)
                oth_image = self.crop_img(self.preprep_img(oth_image))
                image = mixup_p * image + (1 - mixup_p) * oth_image
                image = image.round().astype(np.uint8)
                label = label + (1 - mixup_p) * oth_label
                label = np.clip(label, 0.0, 1.0)
        image = self.prep_img(image)
        ret = []
        ret += [image]
        if self.return_fnames:
            ret += [self.fnames[idx]]
        if self.return_crop:
            ret += [self.last_crop]
        if self.no_labels:
            return tuple(ret)
        ret += [torch.from_numpy(label).float()]
        if self.no_unlabeled:
            return tuple(ret)
        u = self._get_single_unlabled_img()
        # multiple augmentations of same img crop
        unlabeled_img = torch.cat([self.prep_img(u.copy()) for i in range(self.K)], 0)
        ret += [unlabeled_img]
        return tuple(ret)

In [None]:
batch_size = BATCH_SIZE
test_batch_size = BATCH_SIZE
lr = LR
lr_min = LR_MIN
t_max = T_MAX

x_trn, x_val, y_trn, y_val = train_test_split(x_train, y_train, test_size=0.02, random_state=SEED)

In [None]:
train_dataset = FATTrainDataset(preproc, x_trn, y_trn, seq=seq, mixup_alpha=MIXUP_ALPHA, is_training=True)
valid_dataset = FATTrainDataset(preproc, x_val, y_val, seq=None, is_training=False, no_unlabeled=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=test_batch_size, shuffle=False)

In [None]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

model = Classifier(num_classes=num_classes)
# model = ClassifierPhase2(model)
model = model.cuda()
model = nn.DataParallel(model)
optimizer = Adam(params=model.parameters(), lr=lr, amsgrad=False)
scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=lr_min)
criterion = nn.BCEWithLogitsLoss().cuda()

In [None]:
epoch = 0
num_epochs = NUM_EPOCHS

In [None]:
mse = torch.nn.MSELoss()

In [None]:
LAMBDA_U = 100.0

In [None]:
criterion = nn.BCEWithLogitsLoss().cuda()

In [None]:
def is_interesting_epoch(epoch):
    epoch = epoch + 1
    if epoch // 10 % 2 == 1:
        if epoch - (10 * (epoch // 10)) < 5:
            return True
    return False

mb = master_bar(range(epoch, num_epochs))

for epoch in mb:
    start_time = time.time()
    model.train()
    avg_loss_u, avg_loss = 0., 0.

    for x_batch, y_batch, u_batch in progress_bar(train_loader, parent=mb):
        x_batch = x_batch.cuda()
        y_batch = y_batch.cuda()
        h, w = u_batch.shape[-2:]
        u_batch = u_batch.cuda().view(batch_size, 2, 3, h, w).permute(1, 0, 2, 3, 4).contiguous().view(-1, 3, h, w)
        # u_batch is now [K * batch_size, 3, 128, 128]
        K = u_batch.shape[0] // batch_size
        X, p, U, q = mixmatch_create_batch(x_batch, y_batch, u_batch, model)
        preds = model(X)
        loss = criterion(preds, p)
        preds_u = model(U)
        loss_u = (1 / 80.) * mse(torch.sigmoid(preds_u), q)
        loss_u *= LAMBDA_U
        loss_total = loss + loss_u

        optimizer.zero_grad()
        loss_total.backward()
        optimizer.step()

        avg_loss += loss.item() / len(train_loader)
        avg_loss_u += loss_u.item() / len(train_loader)

    do_val = is_interesting_epoch(epoch)

    if do_val:
        model.eval()
        valid_preds = np.zeros((len(x_val), num_classes))
        avg_val_loss = 0.

        for i, (x_batch, y_batch) in enumerate(valid_loader):
            preds = model(x_batch.cuda()).detach()
            loss = criterion(preds, y_batch.cuda())

            preds = torch.sigmoid(preds)
            valid_preds[i * test_batch_size: (i+1) * test_batch_size] = preds.cpu().numpy()

            avg_val_loss += loss.item() / len(valid_loader)

        score, weight = calculate_per_class_lwlrap(y_val, valid_preds)
        lwlrap = (score * weight).sum()

        if is_interesting_epoch(epoch):
            savedir = str(tboard_log_dir).replace('tboard', 'ckpts')
            if not os.path.exists(savedir):
                os.makedirs(savedir)
            torch.save(model.module.state_dict(), Path(savedir) / 'weight_epoch{}.pt'.format(epoch + 1))

        tboard_writer.add_scalar('metrics/avg_val_loss', avg_val_loss, epoch + 1)
        tboard_writer.add_scalar('metrics/val_lwlrap', lwlrap, epoch + 1)

    elapsed = time.time() - start_time

    scheduler.step()

    tboard_writer.add_scalar('metrics/avg_train_loss', avg_loss, epoch + 1)
    tboard_writer.add_scalar('metrics/avg_loss_u', avg_loss_u, epoch + 1)
    tboard_writer.add_scalar('meta/lr', get_lr(optimizer), epoch + 1)
    tboard_writer.add_scalar('meta/elapsed', elapsed, epoch + 1)