In [1]:
%load_ext autoreload
%autoreload 2
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import gc
import os
import pickle
import random
import time
from collections import Counter, defaultdict
from functools import partial
from pathlib import Path
from psutil import cpu_count

import librosa
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
#from skmultilearn.model_selection import iterative_train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

import tensorboardX

from freesound.utils.general import seed_everything, setup_tboard_writer
from freesound.utils.lwlwrap import calculate_per_class_lwlrap
from freesound.spec_augment import augment_spectrogram as augspecorig
from freesound.imaug_seqs import imgaug_seqs_dict
from freesound.archis.large import Classifier

import bz2
from freesound.preprocessor import Preprocessor
import pylab as plt
from pathlib import Path
import os

from shutil import rmtree
import json
from freesound.utils.general import hash_dict

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'

## PARAMS ##
SEED = 2021
NUM_VAL_PREDS_PER_EPOCH = 48
PREPROCESSOR_CONFIG_NAME = 'tf_1024'
RANDOM_VOLUME = 0.8
SPEC_AUGMENT_PROB = 0.25
MIXUP_ALPHA = 0.0
IMGAUG_SEQ = 'default'
BATCH_SIZE = 64
LR = 3e-3
LR_MIN = 1e-5
T_MAX = 10
NUM_EPOCHS = 300
############

preprocessor_config_path = 'config/preprocessing/{}.yaml'.format(PREPROCESSOR_CONFIG_NAME)
augment_spectrogram = lambda x: augspecorig(x, RANDOM_VOLUME, SPEC_AUGMENT_PROB)

In [4]:
# Hash params config and write to file
params_dict = {}
params_str = ['NUM_VAL_PREDS_PER_EPOCH', 'SEED', 'PREPROCESSOR_CONFIG_NAME', 'RANDOM_VOLUME', 'SPEC_AUGMENT_PROB',
              'MIXUP_ALPHA', 'IMGAUG_SEQ', 'BATCH_SIZE', 'LR', 'LR_MIN', 'T_MAX', 'NUM_EPOCHS']
for k in params_str:
    params_dict[k] = eval(k)
params_hash = hash_dict(params_dict)

savedir = Path('ckpts') / params_hash
os.makedirs(savedir)
with open(savedir / 'config.json', 'w') as f:
    json.dump(params_dict, f)

In [5]:
seed_everything(SEED)
tboard_writer, tboard_log_dir = setup_tboard_writer(params_hash)

In [6]:
N_JOBS = cpu_count()
os.environ['MKL_NUM_THREADS'] = str(N_JOBS)
os.environ['OMP_NUM_THREADS'] = str(N_JOBS)
DataLoader = partial(DataLoader, num_workers=N_JOBS)

In [7]:
dataset_dir = Path(os.environ['FS_INPUTS_BASE']) / 'freesound-audio-tagging-2019'

In [8]:
csvs = {
    'train_curated': dataset_dir / 'train_curated.csv',
    'train_noisy': dataset_dir / 'train_noisy.csv',
    'sample_submission': dataset_dir / 'sample_submission.csv',
    'test': dataset_dir / 'test.csv',
}

In [9]:
df_train_curated = pd.read_csv(csvs['train_curated'])
df_train_noisy = pd.read_csv(csvs['train_noisy'])
df_sample = pd.read_csv(csvs['sample_submission'])

In [None]:
labels = df_sample.columns[1:].tolist()

In [None]:
num_classes = len(labels)

In [None]:
def df_to_dummies(df):
    y_train = df['labels'].str.get_dummies(sep=',').values.astype(np.float32)
    assert y_train.shape[1] == 80
    return y_train

def df_to_x(df):
    return df.fname.values

def df_to_xy(df):
    y = df_to_dummies(df)
    x = df_to_x(df)
    assert len(x) == len(y)
    return x, y

In [None]:
x_train, y_train = df_to_xy(df_train_curated)
x_train_noisy, y_train_noisy = df_to_xy(df_train_noisy)
x_train_all = np.concatenate([x_train, x_train_noisy], 0)
y_train_all = np.concatenate([y_train, y_train_noisy], 0)
x_test = df_to_x(df_sample)
all_wavnames = np.append(x_train, x_train_noisy)

In [None]:
preproc = Preprocessor(preprocessor_config_path)
preproc.fill_cache(all_wavnames)

Loading took 4.1961669921875e-05 seconds


  S = np.maximum(-80., 10 * np.log10(S + 1e-80) - 10 * 3.4)


In [None]:
seq = imgaug_seqs_dict[IMGAUG_SEQ]

In [None]:
def get_noisy_img_and_label(pp):
    idx = np.random.randint(len(x_train_noisy))
    fname = x_train_noisy[idx]
    return pp[fname], y_train_noisy[idx], fname
        

class FATTrainDataset(Dataset):
    def __init__(self, preproc, fnames, labels, seq, mixup_alpha=MIXUP_ALPHA, is_training=True,
                 desired_length=128, no_labels=False, no_fnames=True, do_augmentation=True, df=None):
        super().__init__()
        self.preproc = preproc
        self.fnames = fnames
        self.labels = labels
        self.seq = seq
        self.mixup_alpha = mixup_alpha
        self.is_training = is_training
        self.desired_length = desired_length
        self.no_labels = no_labels
        self.no_fnames = no_fnames

        self.transforms = transforms.ToTensor()
        self.no_fnames = no_fnames
        self.do_augmentation = do_augmentation
        self.df = df
        
    def __len__(self):
        return len(self.fnames)
    
    def crop_img(self, image, crop=None):
        time_dim = image.shape[1]
        diff = time_dim - self.desired_length
        if diff > 0:
            if crop is None:
                crop = random.randint(0, diff)
            image = image[:, crop:crop + self.desired_length]
        elif diff < 0:
            tmp = np.zeros([image.shape[0], self.desired_length, *image.shape[2:]],
                           dtype=image.dtype)
            if crop is None:
                crop = random.randint(0, -diff)
            tmp[:, crop:crop + image.shape[1]] = image
            image = tmp
        else:
            crop = 0
        return image, crop
    
    def prep_img(self, image):
        if self.is_training and self.do_augmentation:
            image = self.seq.augment_image(image)
        image = self.transforms(image)
        if self.is_training and self.do_augmentation:
            image = augment_spectrogram(image)
        return image.div_(255)
    
    def preprep_img(self, image):
        if image.shape[0] == 1:
            image = np.tile(image, [3, 1, 1])
        image = np.transpose(image, [1, 2, 0])
        return image
    
    def get_pd(self, idx):
        c0, c1, f0, f1, mixup_p, y = self.df[idx]
        image = self.preproc[f0]
        image = self.preprep_img(image)
        image, crop = self.crop_img(image, c0)
        if mixup_p < 0.98:  # save compute when mixup barely has effect
            oth_image = self.preproc[f1]
            oth_image = self.preprep_img(oth_image)
            oth_image, oth_crop = self.crop_img(oth_image, c1)
            image = mixup_p * image + (1 - mixup_p) * oth_image
            image = image.round().astype(np.uint8)
        image = self.prep_img(image)
        return image, y
            
    def __getitem__(self, idx):
        if self.df is not None:
            return self.get_pd(idx)
        fname = self.fnames[idx]
        oth_fname = ''
        image = self.preproc[fname]
        image = self.preprep_img(image)
        if not self.no_labels:
            label = self.labels[idx]
        image, crop = self.crop_img(image)
        crops = [crop, -99999]
        mixup_p = 1.0
        if self.is_training and self.mixup_alpha:
            mixup_p = np.random.beta(self.mixup_alpha + 1, self.mixup_alpha)
            if mixup_p < 0.98:  # save compute when mixup barely has effect
                oth_image, oth_label, oth_fname = get_noisy_img_and_label(self.preproc)
                oth_image = self.preprep_img(oth_image)
                oth_image, oth_crop = self.crop_img(oth_image)
                crops[-1] = oth_crop
                image = mixup_p * image + (1 - mixup_p) * oth_image
                image = image.round().astype(np.uint8)
                label = label + (1 - mixup_p) * oth_label
                label = np.clip(label, 0.0, 1.0)
        image = self.prep_img(image)
        ret = []
        ret += [image]
        if self.no_labels:
            return tuple(ret)
        ret += [torch.from_numpy(label).float()]
        if self.no_fnames:
            return tuple(ret)
        ret += crops
        ret += [fname, oth_fname]
        ret += [mixup_p]
        return tuple(ret)

In [None]:
batch_size = BATCH_SIZE
test_batch_size = BATCH_SIZE * 2
lr = LR
lr_min = LR_MIN
t_max = T_MAX

In [None]:
train_dataset = FATTrainDataset(preproc, x_train, y_train, seq=seq, mixup_alpha=MIXUP_ALPHA, is_training=True)
valid_dataset = FATTrainDataset(preproc, x_train_all, y_train_all, seq=seq, is_training=True, no_fnames=False, mixup_alpha=0.5)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=test_batch_size, shuffle=True, drop_last=True)

In [None]:
# x, y, c0, c1, f0, f1, mix = valid_dataset[0]
# df = [[c0, c1, f0, f1, mix, 'y']]
# test_ds = FATTrainDataset(preproc, x_train_all, y_train_all, seq=seq, is_training=True, do_augmentation=False, df=df)
# torch.all(test_ds[0][0] = x)  # True

In [None]:
import torch.nn.functional as F

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

model = Classifier(num_classes=num_classes)
model = model.cuda()
optimizer = Adam(params=model.parameters(), lr=lr, amsgrad=False)
scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=lr_min)
criterion = nn.BCEWithLogitsLoss().cuda()
model = nn.DataParallel(model)

In [None]:
epoch = 0
global_step = 0
num_epochs = NUM_EPOCHS

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
def is_interesting_epoch(epoch):
    epoch = epoch + 1
    if epoch // 10 % 2 == 1:
        if epoch - (10 * (epoch // 10)) < 5:
            if epoch > 88:
                return True
    return False

mb = master_bar(range(epoch, num_epochs))

for epoch in mb:
    start_time = time.time()
    model.train()
    avg_loss = 0.

    for stuff in progress_bar(train_loader, parent=mb):
        global_step += 1
        if len(stuff) == 2:
            x_batch, y_batch = [i.cuda() for i in stuff]
            
        preds = model(x_batch)
        loss_train = criterion(preds, y_batch)
        loss = loss_train
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        avg_loss += loss_train.item() / len(train_loader)

    do_val = is_interesting_epoch(epoch)

    if do_val:
        model.eval()
        df = None
        iterer = iter(valid_loader)
        for _ in progress_bar(range(NUM_VAL_PREDS_PER_EPOCH), parent=mb):
            x, y, c0, c1, f0, f1, mix = next(iterer)
            preds = model(x.cuda()).detach().cpu()
            df_this = pd.DataFrame([np.array(i) for i in [c0, c1, f0, f1, mix, preds]]).T
            if df is None:
                df = df_this
            else:
                df = pd.concat([df, df_this], 0)

        df.columns = ['c0', 'c1', 'f0', 'f1', 'mix', 'y']
        if os.path.exists(str(savedir / f'epoch{epoch}.csv')):
            os.remove(str(savedir / f'epoch{epoch}.csv'))
        df.to_hdf(savedir / f'epoch{epoch}.csv', 'data')

    elapsed = time.time() - start_time

    scheduler.step()

    tboard_writer.add_scalar('metrics/avg_train_loss', avg_loss, epoch + 1)
    tboard_writer.add_scalar('meta/lr', get_lr(optimizer), epoch + 1)
    tboard_writer.add_scalar('meta/elapsed', elapsed, epoch + 1)