In [1]:
%load_ext autoreload
%autoreload 2
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import gc
import os
import pickle
import random
import time
from collections import Counter, defaultdict
from functools import partial
from pathlib import Path
from psutil import cpu_count

import librosa
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
#from skmultilearn.model_selection import iterative_train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

import tensorboardX

from freesound.utils.general import seed_everything, setup_tboard_writer
from freesound.utils.lwlwrap import calculate_per_class_lwlrap
from freesound.spec_augment import augment_spectrogram as augspecorig
from freesound.imaug_seqs import imgaug_seqs_dict

import bz2
from freesound.preprocessor import Preprocessor
import pylab as plt
from pathlib import Path
import os

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'


## PARAMS ##
RUN_NAME = 'test'
SEED = 2019
PREPROCESSOR_CONFIG_NAME = 'default'
RANDOM_VOLUME = 0.8
SPEC_AUGMENT_PROB = 0.25
MIXUP_ALPHA = 0.3
IMGAUG_SEQ = 'default'
############

preprocessor_config_path = 'config/preprocessing/{}.yaml'.format(PREPROCESSOR_CONFIG_NAME)
augment_spectrogram = lambda x: augspecorig(x, RANDOM_VOLUME, SPEC_AUGMENT_PROB)

In [4]:
seed_everything(SEED)
tboard_writer, tboard_log_dir = setup_tboard_writer(RUN_NAME)

In [5]:
N_JOBS = cpu_count()
os.environ['MKL_NUM_THREADS'] = str(N_JOBS)
os.environ['OMP_NUM_THREADS'] = str(N_JOBS)
DataLoader = partial(DataLoader, num_workers=N_JOBS)

In [6]:
dataset_dir = Path(os.environ['FS_INPUTS_BASE']) / 'freesound-audio-tagging-2019'

In [7]:
csvs = {
    'train_curated': dataset_dir / 'train_curated.csv',
    'train_noisy': dataset_dir / 'train_noisy.csv',
    'sample_submission': dataset_dir / 'sample_submission.csv',
    'test': dataset_dir / 'test.csv',
}

In [8]:
df_train_curated = pd.read_csv(csvs['train_curated'])
df_train_noisy = pd.read_csv(csvs['train_noisy'])
df_sample = pd.read_csv(csvs['sample_submission'])

In [9]:
labels = df_sample.columns[1:].tolist()

In [10]:
num_classes = len(labels)

In [11]:
def df_to_dummies(df):
    y_train = df['labels'].str.get_dummies(sep=',').values.astype(np.float32)
    assert y_train.shape[1] == 80
    return y_train

def df_to_x(df):
    return df.fname.values

def df_to_xy(df):
    y = df_to_dummies(df)
    x = df_to_x(df)
    assert len(x) == len(y)
    return x, y

In [12]:
x_train, y_train = df_to_xy(df_train_curated)

In [13]:
x_train_noisy, y_train_noisy = df_to_xy(df_train_noisy)

In [14]:
x_test = df_to_x(df_sample)

In [15]:
all_wavnames = np.append(x_train, x_train_noisy)

In [16]:
preproc = Preprocessor(preprocessor_config_path)

Loading took 168.1341371536255 seconds


In [17]:
# preproc.fill_cache(all_wavnames)

In [18]:
seq = imgaug_seqs_dict[IMGAUG_SEQ]

In [19]:
def get_noisy_img_and_label(pp, use_test_p=0.0):
    if use_test_p > 0 and np.random.rand() < use_test_p:
        # sample from test with no label
        idx = np.random.randint(len(x_test))
        return pp[x_test[idx]], y_train_noisy[0] * 0.0
    idx = np.random.randint(len(x_train_noisy))
    return pp[x_train_noisy[idx]], y_train_noisy[idx]
        

class FATTrainDataset(Dataset):
    def __init__(self, preproc, fnames, labels, seq, mixup_alpha, is_training,
                 desired_length=128, no_labels=False, return_fnames=False, return_crop=False):
        super().__init__()
        self.preproc = preproc
        self.fnames = fnames
        self.labels = labels
        self.seq = seq
        self.mixup_alpha = mixup_alpha
        self.is_training = is_training
        self.desired_length = desired_length
        self.no_labels = no_labels
        self.return_fnames = return_fnames
        self.return_crop = return_crop

        self.transforms = transforms.ToTensor()
        
    def __len__(self):
        return len(self.fnames)
    
    def crop_img(self, image):
        time_dim = image.shape[1]
        diff = time_dim - self.desired_length
        if diff > 0:
            crop = random.randint(0, diff)
            self.last_crop = last_crop
            image = image[:, crop:crop + self.desired_length]
        elif diff < 0:
            tmp = np.zeros([image.shape[0], self.desired_length, *image.shape[2:]],
                           dtype=image.dtype)
            start = random.randint(0, -diff)
            self.last_crop = start
            tmp[:, start:start + image.shape[1]] = image
            image = tmp
        return image
    
    def prep_img(self, image):
        if self.is_training:
            image = self.seq.augment_image(image)
        image = self.transforms(image)
        if self.is_training:
            image = augment_spectrogram(image)
        return image.div_(255)
        
    def __getitem__(self, idx):
        image = self.preproc[self.fnames[idx]]
        if image.shape[0] == 1:
            image = np.tile(image, [3, 1, 1])
        image = np.transpose(image, [1, 2, 0])
        if not self.no_labels:
            label = self.labels[idx]
        image = self.crop_img(image)
        if self.is_training and self.mixup_alpha:
            mixup_p = np.random.beta(self.mixup_alpha + 1, self.mixup_alpha)
            if mixup_p < 0.98:  # save compute when mixup barely has effect
                oth_image, oth_label = get_noisy_img_and_label(self.preproc)
                oth_image = self.crop_img(oth_image)
                image = mixup_p * image + (1 - mixup_p) * image
                image = image.round().astype(np.uint8)
                label = label + (1 - mixup_p) * oth_label
                label = np.clip(label, 0.0, 1.0)
        image = self.prep_img(image)
        ret = []
        ret += [image]
        if self.return_fnames:
            ret += [self.fnames[idx]]
        if self.return_crop:
            ret += [self.last_crop]
        if self.no_labels:
            return tuple(ret)
        ret += [torch.from_numpy(label).float()]
        return tuple(ret)