## Base Code

In [1]:
%matplotlib inline
import IPython.core.display         
# setup output image format (Chrome works best)
IPython.core.display.set_matplotlib_formats("svg")
import matplotlib.pyplot as plt
import matplotlib
from numpy import *
from sklearn import *
from scipy import stats
random.seed(100)
import csv
from scipy import io
import pickle
from IPython.display import Audio, display

# https://stackoverflow.com/questions/5364050/reloading-submodules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
def showAudio(info):
    display(Audio(info['previews']['preview-lq-mp3']))

def load_pickle(fname):
    f = open(fname, 'rb')
    out = pickle.load(f)
    f.close()
    return out

In [3]:
train_tags  = load_pickle('train_tags.pickle3')
train_mfccs = load_pickle('train_mfccs.pickle3')
train_info  = load_pickle('train_info.pickle3')

test_mfccs = load_pickle('test_mfccs.pickle3')
test_info  = load_pickle('test_info.pickle3')

In [4]:
# compute delta MFCCs
def compute_delta_mfccs(mfccs):
    dmfccs = []
    for m in mfccs:
        tmp = m[1:] - m[0:-1]
        dm = hstack((m[0:-1], tmp))
        dmfccs.append(dm)
    return dmfccs

In [5]:
train_dmfccs = compute_delta_mfccs(train_mfccs)
test_dmfccs  = compute_delta_mfccs(test_mfccs)

In [6]:
tagnames, tagnames_counts = unique(concatenate(train_tags), return_counts=True)
for a,b in zip(tagnames, tagnames_counts):
    print("{}: {}".format(a, b))

acoust: 100
analog: 100
bass: 160
beat: 128
drum: 371
effect: 141
electron: 194
field: 110
glitch: 110
guitar: 130
hit: 110
loop: 237
machin: 100
metal: 117
nois: 199
percuss: 285
record: 192
space: 125
synth: 220
synthes: 136
vocal: 120
voic: 167


In [7]:
# convert list of tags into binary class labels
def tags2class(tags, tagnames):
    b = zeros(shape=(len(tags), len(tagnames)))
    for i,t in enumerate(tags):
        for j,n in enumerate(tagnames):
            if n in t:
                b[i,j] = 1
    return b

In [8]:
# train_classes[i,j] = absence/presence of the j-th tag in the i-th sound
train_classes = tags2class(train_tags, tagnames)

In [9]:
# double check we did this correctly...
# it should be the same as the tag counts above
sum(train_classes,axis=0)

array([100., 100., 160., 128., 371., 141., 194., 110., 110., 130., 110.,
       237., 100., 117., 199., 285., 192., 125., 220., 136., 120., 167.])

In [10]:
import csv

def write_csv_kaggle_tags(fname, tagnames, Yscores):
    # header
    tmp = [['Id']]
    for t in tagnames:
        tmp[0].append(t)    
    
    # add ID numbers for each Y, and usage if necessary
    for i in range(len(Yscores)):
        tmp2 = [(i+1)]
        for t in range(len(tagnames)):
            tmp2.append(Yscores[i,t])
        
        tmp.append(tmp2)
        
    # write CSV file
    f = open(fname, 'w')
    writer = csv.writer(f)
    writer.writerows(tmp)
    f.close()

## NN experiment

In [97]:
num_classes = len(tagnames)
num_train = len(train_mfccs)
num_test = len(test_mfccs)
pos_cnt = train_classes.sum(0)

print(f'num_classes: {num_classes}')
print(f'number of training samples: {num_train}')
print(f'number of testing samples: {num_test}')
print(f'number of positives in each class:\n {pos_cnt}')


print(f'train_mfccs[0].shape: {train_mfccs[0].shape}')
print(f'train_classes.shape: {train_classes.shape}')


num_classes: 22
number of training samples: 1788
number of testing samples: 262
number of positives in each class:
 [100. 100. 160. 128. 371. 141. 194. 110. 110. 130. 110. 237. 100. 117.
 199. 285. 192. 125. 220. 136. 120. 167.]
train_mfccs[0].shape: (345, 13)
train_classes.shape: (1788, 22)


In [13]:
# plot an ROC curve using class labels and class scores
class AUROC(object):
    def __init__(self, unique_tag_num=22):
        self.unique_tag_num = unique_tag_num
    
    def __call__(self, Yscores, Yclasses):
        fprall = []
        tprall = []
        aucall = []
        for i in range(self.unique_tag_num):
            fpr, tpr, thresholds = metrics.roc_curve(Yclasses[:,i], Yscores[:,i])
#             plt.plot(fpr, tpr, lw=0.5, alpha=0.5)
            auc = metrics.auc(fpr, tpr)
            fprall.append(fpr)
            tprall.append(tpr)
            aucall.append(auc)

        # Then interpolate all ROC curves at this points
        all_fpr = unique(concatenate(fprall))
        mean_tpr = zeros_like(all_fpr)
        for i in range(self.unique_tag_num):
            mean_tpr += interp(all_fpr, fprall[i], tprall[i])

        # Finally average it and compute AUC
        mean_tpr /= self.unique_tag_num

        # auc of the average ROC curve
        auc = metrics.auc(all_fpr, mean_tpr)

        # average AUC
        mc_auc = mean(aucall)
    
        return mc_auc, auc

In [14]:
import torch
from torch.utils.data import DataLoader, random_split
import numpy as np
from extra.dataset import MFCCDataset, MyPadCollate
import logging

LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

# https://pytorch.org/docs/stable/notes/randomness.html
SEED = 10086
torch.manual_seed(SEED)
np.random.seed(SEED)
# https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True


### test MFCCDataset and MyPadCollate

In [106]:
mfcc_dataset = MFCCDataset(train_mfccs, train_classes)

In [108]:
for i in range(10):
    mfcc, label = mfcc_dataset[i]
    mfcc_diff = mfcc.numpy() - train_mfccs[i]
    print('mfcc_diff: ', np.sum(np.abs(mfcc_diff)))
    label_diff = label.numpy() - train_classes[i]
    print('label_diff: ', np.sum(np.abs(label_diff)))

mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0
mfcc_diff:  0.0
label_diff:  0.0


In [109]:
allloader = DataLoader(mfcc_dataset,
                             batch_size=64,
                             collate_fn=MyPadCollate(batch_first=True))

In [110]:
for i, batch in enumerate(data_loader):
    x, y = batch
    print(x.shape)
    print(y.shape)

torch.Size([64, 1111, 13])
torch.Size([64, 22])
torch.Size([64, 1123, 13])
torch.Size([64, 22])
torch.Size([64, 1161, 13])
torch.Size([64, 22])
torch.Size([64, 1274, 13])
torch.Size([64, 22])
torch.Size([64, 638, 13])
torch.Size([64, 22])
torch.Size([64, 1926, 13])
torch.Size([64, 22])
torch.Size([64, 432, 13])
torch.Size([64, 22])
torch.Size([64, 1256, 13])
torch.Size([64, 22])
torch.Size([64, 865, 13])
torch.Size([64, 22])
torch.Size([64, 814, 13])
torch.Size([64, 22])
torch.Size([64, 1212, 13])
torch.Size([64, 22])
torch.Size([64, 546, 13])
torch.Size([64, 22])
torch.Size([64, 1293, 13])
torch.Size([64, 22])
torch.Size([64, 1109, 13])
torch.Size([64, 22])
torch.Size([64, 714, 13])
torch.Size([64, 22])
torch.Size([64, 1222, 13])
torch.Size([64, 22])
torch.Size([64, 1193, 13])
torch.Size([64, 22])
torch.Size([64, 1197, 13])
torch.Size([64, 22])
torch.Size([64, 1183, 13])
torch.Size([64, 22])
torch.Size([64, 1260, 13])
torch.Size([64, 22])
torch.Size([64, 1227, 13])
torch.Size([64, 22]

### split dataset to train and validation

In [111]:
train_val_split_ratio = 0.9
train_num = int(train_val_split_ratio * len(mfcc_dataset))
valid_num = len(mfcc_dataset) - train_num
mfcc_train, mfcc_valid = random_split(mfcc_dataset,
                                      (train_num, valid_num))

print(len(mfcc_train))
print(len(mfcc_valid))

1609
179


In [112]:
def train(model, dataloader, optimizer, criterion):
    epoch_loss = 0
    
    model.train()
    for dt, gt in dataloader:
        dt = dt.float().cuda()
        gt = gt.float().cuda()
        optimizer.zero_grad()
        predictions = model(dt)
        loss = criterion(predictions, gt)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
        
    return epoch_loss / len(dataloader)


In [113]:
def evaluate(model, dataloader, criterion):
    model.eval()
    
    preds = []
    gts = []
    with torch.no_grad():
        for dt, gt in dataloader:
            dt = dt.float().cuda()
            gt = gt.float().cuda()
            predictions = model(dt)
            preds.append(predictions)
            gts.append(gt)
            
    preds = torch.cat(preds, dim=0).cpu().numpy()
    gts = torch.cat(gts, dim=0).cpu().numpy()
    score = criterion(preds, gts)

    return preds, score

In [136]:
from extra.nn_model import GRUTagging
import torch.optim as optim
import torch.nn as nn

from sklearn.model_selection import ParameterGrid


def grid_search(model_name, param_grid, trainloader, validloader, n_epochs=50):
    param_combs = list(ParameterGrid(param_grid))
    global_best_score = 0.
    best_save_path = None
    for comb in param_combs:
        model_kwargs = {}
        loss_kwargs = {}
        optim_kwargs = {}
        for key, val in comb.items():
            component, arg = key.split('.')
            if component == 'loss':
                loss_kwargs[arg] = val
            elif component == 'model':
                model_kwargs[arg] = val
            elif component == 'optim':
                optim_kwargs[arg] = val
            else:
                raise ValueError(f'Unknow component {component}.')
        
        if model_name == 'gru':
            model = GRUTagging(batch_first=True, **model_kwargs).cuda()
        else:
            raise NotImplementedError(f'Model {model_name} is not implemented.')
        
        optimizer = optim.Adam(model.parameters(), **optim_kwargs)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=n_epochs//3,
                                              gamma=0.5)
        criterion = nn.BCEWithLogitsLoss(**loss_kwargs).cuda()
        val_criterion = AUROC(len(tagnames))
        
        epoch_best_valid_score = 0.
        
        logging.info(f'training model {model_name}.'\
                     f'\n\t -model_kwargs: {model_kwargs}'\
                     f'\n\t -loss_kwargs: {loss_kwargs}'\
                     f'\n\t -optim_kwargs: {optim_kwargs}')
        
        for epoch in range(1, n_epochs+1):
            scheduler.step()
            
            train_loss = train(model, trainloader, optimizer, criterion)
            preds, (mc_auc, auc) = evaluate(model, validloader, val_criterion)
            
            if mc_auc > epoch_best_valid_score:
                epoch_best_valid_score = mc_auc
                save_dict = {'model_name': model_name,
                             'model_kwargs': model_kwargs,
                             'loss_kwargs': loss_kwargs,
                             'optim_kwargs': optim_kwargs,
                             'model_state_dict': model.state_dict()}
                save_path = f'ckpt/{model_name}_mcauc_{mc_auc:.3f}.pt'
                torch.save(save_dict, save_path)
                if mc_auc > global_best_score:
                    global_best_score = mc_auc
                    best_save_path = save_path
                
            logging.info(f'Epoch: {epoch:02}/{n_epochs} | '\
                         f'Train Loss: {train_loss:.3f} | Valid. MCAUC: {mc_auc:.3f}')
    return best_save_path, global_best_score

In [137]:
def load_model(ckpt_path):
    save_dict = torch.load(ckpt_path)
    if save_dict['model_name'] == 'gru':
        model = GRUTagging(batch_first=True, **save_dict['model_kwargs']).cuda()
    else:
        raise NotImplementedError(f'Model {model_name} is not implemented.')
    model.load_state_dict(save_dict['model_state_dict'])
    return model

In [139]:
paramgrid = {
    'model.hidden_dim': [256, 128, 64],
    'model.dropout': [0.3, 0.2],
    'model.n_layers': [2, 3] ,
    'loss.pos_weight': [None, pos_cnt/num_train],
    'optim.lr': [1e-3, 1e-2]
}

# paramgrid = {
#     'model.hidden_dim': [128, 64],
#     'model.dropout': [0.2],
#     'model.n_layers': [2] ,
#     'loss.pos_weight': [torch.from_numpy(pos_cnt/num_train)],
#     'optim.lr': [1e-3, 1e-2]
# }

trainloader = DataLoader(mfcc_train,
                         batch_size=64,
                         collate_fn=MyPadCollate(batch_first=True))

validloader = DataLoader(mfcc_valid,
                       batch_size=64,
                       collate_fn=MyPadCollate(batch_first=True))

best_save_path, best_score = grid_search('gru',
                                         param_grid=paramgrid,
                                         trainloader=trainloader,
                                         validloader=validloader) 

2020-03-30 15:50:21,350 - INFO - training model gru.
	 -model_kwargs: {'dropout': 0.3, 'hidden_dim': 256, 'n_layers': 2}
	 -loss_kwargs: {'pos_weight': None}
	 -optim_kwargs: {'lr': 0.001}
2020-03-30 15:50:27,371 - INFO - Epoch: 01/50 | Train Loss: 0.337 | Valid. MCAUC: 0.605
2020-03-30 15:50:33,352 - INFO - Epoch: 02/50 | Train Loss: 0.295 | Valid. MCAUC: 0.651
2020-03-30 15:50:39,312 - INFO - Epoch: 03/50 | Train Loss: 0.287 | Valid. MCAUC: 0.701
2020-03-30 15:50:45,341 - INFO - Epoch: 04/50 | Train Loss: 0.279 | Valid. MCAUC: 0.728
2020-03-30 15:50:51,330 - INFO - Epoch: 05/50 | Train Loss: 0.272 | Valid. MCAUC: 0.743
2020-03-30 15:50:57,292 - INFO - Epoch: 06/50 | Train Loss: 0.266 | Valid. MCAUC: 0.754
2020-03-30 15:51:03,315 - INFO - Epoch: 07/50 | Train Loss: 0.259 | Valid. MCAUC: 0.771
2020-03-30 15:51:09,287 - INFO - Epoch: 08/50 | Train Loss: 0.251 | Valid. MCAUC: 0.770
2020-03-30 15:51:15,243 - INFO - Epoch: 09/50 | Train Loss: 0.245 | Valid. MCAUC: 0.780
2020-03-30 15:51:21

2020-03-30 15:59:23,287 - INFO - Epoch: 40/50 | Train Loss: 0.304 | Valid. MCAUC: 0.527
2020-03-30 15:59:29,378 - INFO - Epoch: 41/50 | Train Loss: 0.304 | Valid. MCAUC: 0.510
2020-03-30 15:59:35,399 - INFO - Epoch: 42/50 | Train Loss: 0.303 | Valid. MCAUC: 0.506
2020-03-30 15:59:41,408 - INFO - Epoch: 43/50 | Train Loss: 0.302 | Valid. MCAUC: 0.526
2020-03-30 15:59:47,434 - INFO - Epoch: 44/50 | Train Loss: 0.304 | Valid. MCAUC: 0.525
2020-03-30 15:59:53,474 - INFO - Epoch: 45/50 | Train Loss: 0.304 | Valid. MCAUC: 0.517
2020-03-30 15:59:59,520 - INFO - Epoch: 46/50 | Train Loss: 0.303 | Valid. MCAUC: 0.546
2020-03-30 16:00:05,524 - INFO - Epoch: 47/50 | Train Loss: 0.303 | Valid. MCAUC: 0.503
2020-03-30 16:00:11,591 - INFO - Epoch: 48/50 | Train Loss: 0.300 | Valid. MCAUC: 0.517
2020-03-30 16:00:17,602 - INFO - Epoch: 49/50 | Train Loss: 0.303 | Valid. MCAUC: 0.505
2020-03-30 16:00:23,590 - INFO - Epoch: 50/50 | Train Loss: 0.303 | Valid. MCAUC: 0.499
2020-03-30 16:00:23,606 - INFO -

2020-03-30 16:14:10,956 - INFO - Epoch: 29/50 | Train Loss: 0.321 | Valid. MCAUC: 0.518
2020-03-30 16:14:21,425 - INFO - Epoch: 30/50 | Train Loss: 0.322 | Valid. MCAUC: 0.520
2020-03-30 16:14:31,897 - INFO - Epoch: 31/50 | Train Loss: 0.322 | Valid. MCAUC: 0.502
2020-03-30 16:14:42,397 - INFO - Epoch: 32/50 | Train Loss: 0.310 | Valid. MCAUC: 0.525
2020-03-30 16:14:52,870 - INFO - Epoch: 33/50 | Train Loss: 0.305 | Valid. MCAUC: 0.512
2020-03-30 16:15:03,352 - INFO - Epoch: 34/50 | Train Loss: 0.306 | Valid. MCAUC: 0.516
2020-03-30 16:15:13,834 - INFO - Epoch: 35/50 | Train Loss: 0.306 | Valid. MCAUC: 0.517
2020-03-30 16:15:24,319 - INFO - Epoch: 36/50 | Train Loss: 0.305 | Valid. MCAUC: 0.525
2020-03-30 16:15:34,815 - INFO - Epoch: 37/50 | Train Loss: 0.305 | Valid. MCAUC: 0.535
2020-03-30 16:15:45,282 - INFO - Epoch: 38/50 | Train Loss: 0.304 | Valid. MCAUC: 0.535
2020-03-30 16:15:55,754 - INFO - Epoch: 39/50 | Train Loss: 0.305 | Valid. MCAUC: 0.538
2020-03-30 16:16:06,238 - INFO -

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [131]:
# confirm score on training set and validation set
print(best_save_path)
model = load_model(best_save_path)

allloader = DataLoader(mfcc_dataset,
                       batch_size=128,
                       collate_fn=MyPadCollate(True)
                      )

pred, (mc_auc, auc) = evaluate(model, allloader, AUROC(len(tagnames)))
print(mc_auc, auc)

pred, (mc_auc, auc) = evaluate(model, validloader,
                               AUROC(len(tagnames)))
print(mc_auc, auc)

ckpt/gru_mcauc_0.698.pt
0.6862989427978575 0.6864538939506115
0.6976500234683715 0.699349398878703


In [54]:
n = len(tagnames)
pseudo_test_label = np.random.randint(2, size=(len(test_mfccs), n))

test_dataset = MFCCDataset(test_mfccs, pseudo_test_label)
test_loader = DataLoader(test_dataset,
                         batch_size=128,
                         collate_fn=MyPadCollate())

test_preds, _ = evaluate(model, test_loader,
                         AUROC(len(tagnames)))

In [56]:
save_name = os.path.basename(best_save_path)[:-2]+'csv'
write_csv_kaggle_tags(save_name, tagnames, test_preds)