# Generate Grid Search Table
 - evaluate F1 scores on the test set for both concat and upsample gridsearches
 - uses pandas dataframe to get a latex output of a table with all the results for the paper

In [82]:
import numpy as np
import IPython.display as ipd
import json
import glob
import time
import copy
import pickle
import re

from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torch

import pandas as pd

import matplotlib.pyplot as plt
from tqdm import tqdm 

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cuda:0'

In [136]:
# required so that the pickling works

class BaseModel(nn.Module):
    def __init__(self, hidden_sizes=[512], p=.5):
        super(BaseModel, self).__init__()
        input_size = 4800
        self.hidden_sizes = [input_size] + hidden_sizes + [1]
        for i in range(1, len(self.hidden_sizes)):
            setattr(self, f"hidden_{i}", nn.Linear(self.hidden_sizes[i-1], self.hidden_sizes[i]))
        self.dropout = nn.Dropout(p=p)
        
    def forward(self, x):
        x = self.dropout(x)
        for i in range(1, len(self.hidden_sizes)-1):
            x = getattr(self, f"hidden_{i}")(x)
            x = F.relu(x)
            x = self.dropout(x)
        x = getattr(self, f"hidden_{len(self.hidden_sizes)-1}")(x) # output
        return x

In [7]:
!find . -maxdepth 1 -iname "*.pkl"

./gs_output.pkl
./gs_output_concatenated.pkl
./gs_output_upsampled.pkl


In [8]:
!find test_sets -iname "*.pkl" | head

test_sets/upsample/piano.pkl
test_sets/upsample/trumpet.pkl
test_sets/upsample/banjo.pkl
test_sets/upsample/guitar.pkl
test_sets/upsample/mallet_percussion.pkl
test_sets/upsample/accordion.pkl
test_sets/upsample/bass.pkl
test_sets/upsample/organ.pkl
test_sets/upsample/mandolin.pkl
test_sets/upsample/violin.pkl


In [137]:
# load original openmic data 
with np.load("../openmic-2018/openmic-2018.npz", allow_pickle=True) as data:
    files = data.files
    print(files)
    X = data['X']
    Y_true = data['Y_true']
    Y_mask = data['Y_mask']
    sample_key = data['sample_key']
    
# load openmic classes
with open('../openmic-2018/class-map.json') as f:
    class_map = json.load(f)
    class_map_inv = {v: k for k, v in class_map.items()}
    classes = np.array(list(class_map_inv.values()))

['X', 'Y_true', 'Y_mask', 'sample_key']


In [138]:
# load concatenated gridsearch models and metrics
with open("gs_output_concatenated.pkl", 'rb') as handle:
    models, losses, val_accs, params = pickle.load(handle)

In [11]:
classes

array(['accordion', 'banjo', 'bass', 'cello', 'clarinet', 'cymbals',
       'drums', 'flute', 'guitar', 'mallet_percussion', 'mandolin',
       'organ', 'piano', 'saxophone', 'synthesizer', 'trombone',
       'trumpet', 'ukulele', 'violin', 'voice'], dtype='<U17')

In [90]:
fname_to_path_concat  = lambda fname: f"features2/{fname}.npy"
fname_to_path_upsample = lambda fname: f"features/{fname}.npy"

# required to load from the pickling
class IRdataset(Dataset):
    def __init__(self, instrument='voice', method='concat'):
        
        # not every sample is annotated for every instrument.
        # e.g. for a specific sample nobody might've checked whether or not 
        # there was voice. When constructing our dataset, we only
        # take samples for which the instrument's presence was annotated.
        if method == 'concat':
            fname_func = fname_to_path_concat
            feature_path = "features2/*.npy"
        elif method == 'upsample':
            fname_func = fname_to_path_upsample
            feature_path = "features/*.npy"

        self.mask = Y_mask[:, class_map[instrument]]
        self.fnames = np.array([fname_func(fname) for fname in sample_key])
        existing_files = glob.glob(feature_path)
        fname_exists_mask = np.array([x in existing_files for x in self.fnames])
        self.mask = self.mask * fname_exists_mask
        
        # 
        self.probabilities = Y_true[self.mask][:,class_map[instrument]].astype('float32')
        self.input_files = [np.load(x).astype('float32') for x in self.fnames[self.mask]]
        
    def __len__(self):
        return self.probabilities.shape[0]
        # return 100 # for debug
    
    def __getitem__(self, idx):
        return self.input_files[idx], self.probabilities[idx]
    
    def get_audio(self, idx):
        fname = sample_key[self.mask][idx]
        path = '../openmic-2018/audio/' + fname[0:3] + '/' + fname + '.ogg'
        print(path)
        return ipd.Audio(path)
    
def get_test_dataloader(instrument='voice', method='concat'):
    """
    Get a test dataset based off the test samples set aside during the original gridsearch.
    The test dataloader that is pickled uses weighted random sampling with replacement in order to 
    deal with imbalanced classes. We create a new loader with the underlying data in order to 
    get evaluation metrics purely on the test data. 
    """
    with open("test_sets/" + method + "/" + instrument + ".pkl", "rb") as handle:
        dl_dict = pickle.load(handle)
    test_dataset = dl_dict['test'].dataset # get dataset, new dataloader where we don't weight
    test_dl = DataLoader(test_dataset, batch_size=256)
    return test_dl

In [91]:
get_test_dataloader('voice')

<torch.utils.data.dataloader.DataLoader at 0x7f8cfa46dca0>

In [139]:
def get_f1(instrument, models, method='concat'):
    """
    Given an instrument (string), dictionary of models, and the string describing which gridsearch to query
    (e.g. the one over features of concatenated input data or upsampled input data, 'concat' or 'upsample'
    respectively), compute an F1 score using the test data.
    """
    model = models[instrument].to(device)
    model.eval()   # Set model to evaluate mode
    
    dl = get_test_dataloader(instrument=instrument, method=method)
    true_pos = 0
    false_pos = 0
    false_neg = 0
    for x, y in dl:
        x = x.to(device)
        outputs = model(x).flatten()
        outs = torch.sigmoid(outputs).cpu().detach().numpy()
        # print(outs)
        preds = np.round(outs).astype('int')
        true  = np.round(y.numpy().astype('int'))
        true_pos  += np.sum( preds[true==preds]*1.0)
        false_pos += np.sum((preds[true!=preds] == 1)*1.0)
        false_neg += np.sum((preds[true!=preds] == 0)*1.0)
    p = true_pos / (true_pos + false_pos + 1e-9)
    r = true_pos / (true_pos + false_neg + 1e-9)
    f1 = 2*(p * r)/(p + r + 1e-9)
    return f1

## Construct DF and output in LaTeX

In [115]:
# Construct DF
df = pd.DataFrame(classes, columns=["Instrument"])
df["Val. Loss"] = [f"{x:.3f}" for x in losses.values()]
df["Val. Acc."] = [f"{x.numpy():.3f}" for x in val_accs.values()]
parameters = list(zip(*list(params.values())))
df["H. Layers"] = [str(x) for x in parameters[0]]
df["Reg."] = [f"1e{int(np.log10(x))}" if x > 0 else 0 for x in parameters[1]]
df["Dropout"] = parameters[2]
## 
df["Test F1"] = [f"{get_f1(instrument, models):.3f}" for instrument in classes]
df_sorted = df.sort_values("Test F1", ascending=False)

In [116]:
print(re.sub("( +)", " ", df_sorted.to_latex(index=False))) # get rid of extra ugly whitesace 

\begin{tabular}{lllllrl}
\toprule
 Instrument & Val. Loss & Val. Acc. & H. Layers & Reg. & Dropout & Test F1 \\
\midrule
 piano & 0.111 & 0.968 & [512] & 1e-4 & 0.25 & 0.924 \\
 guitar & 0.168 & 0.964 & [512] & 1e-4 & 0.75 & 0.919 \\
 voice & 0.104 & 0.974 & [512] & 1e-4 & 0.50 & 0.915 \\
 drums & 0.125 & 0.963 & [512] & 1e-3 & 0.25 & 0.912 \\
 synthesizer & 0.198 & 0.956 & [] & 1e-4 & 0.25 & 0.886 \\
 cymbals & 0.180 & 0.954 & [] & 1e-3 & 0.50 & 0.864 \\
 violin & 0.264 & 0.897 & [] & 1e-4 & 0.50 & 0.762 \\
 saxophone & 0.289 & 0.907 & [] & 0 & 0.25 & 0.681 \\
 trumpet & 0.334 & 0.902 & [] & 1e-4 & 0.75 & 0.641 \\
 cello & 0.328 & 0.902 & [] & 1e-3 & 0.75 & 0.608 \\
 organ & 0.354 & 0.865 & [] & 0 & 0.75 & 0.604 \\
mallet\_percussion & 0.322 & 0.867 & [] & 0 & 0.75 & 0.584 \\
 trombone & 0.379 & 0.848 & [] & 1e-3 & 0.75 & 0.531 \\
 flute & 0.466 & 0.772 & [] & 1e-3 & 0.25 & 0.505 \\
 banjo & 0.377 & 0.856 & [] & 1e-3 & 0.25 & 0.494 \\
 bass & 0.366 & 0.857 & [] & 1e-3 & 0.50 & 0.477 \

In [117]:
df_sorted

Unnamed: 0,Instrument,Val. Loss,Val. Acc.,H. Layers,Reg.,Dropout,Test F1
12,piano,0.111,0.968,[512],0.0001,0.25,0.924
8,guitar,0.168,0.964,[512],0.0001,0.75,0.919
19,voice,0.104,0.974,[512],0.0001,0.5,0.915
6,drums,0.125,0.963,[512],0.001,0.25,0.912
14,synthesizer,0.198,0.956,[],0.0001,0.25,0.886
5,cymbals,0.18,0.954,[],0.001,0.5,0.864
18,violin,0.264,0.897,[],0.0001,0.5,0.762
13,saxophone,0.289,0.907,[],0.0,0.25,0.681
16,trumpet,0.334,0.902,[],0.0001,0.75,0.641
3,cello,0.328,0.902,[],0.001,0.75,0.608


## compute % improvement -- concat vs upsample

In [121]:
with open("gs_output_upsampled.pkl", 'rb') as handle:
    umodels, ulosses, uval_accs, uparams = pickle.load(handle)

In [128]:
concat_f1   = np.array([get_f1(instrument,  models, method='concat')   for instrument in classes])
upsample_f1 = np.array([get_f1(instrument, umodels, method='upsample') for instrument in classes])

In [129]:
concat_f1

array([0.47567568, 0.49382716, 0.47727273, 0.60769231, 0.16161616,
       0.86448598, 0.91150442, 0.50515464, 0.9187935 , 0.584     ,
       0.44534413, 0.6039604 , 0.92376682, 0.68062827, 0.88613861,
       0.53125   , 0.64094955, 0.43686007, 0.76237624, 0.91534391])

In [130]:
upsample_f1

array([0.40223464, 0.45849802, 0.41545894, 0.45210728, 0.17708333,
       0.796875  , 0.86893204, 0.47368421, 0.8716707 , 0.6       ,
       0.43478261, 0.60103627, 0.9044289 , 0.66329114, 0.85714286,
       0.46153846, 0.57541899, 0.41201717, 0.71755725, 0.89502762])

In [133]:
np.mean((concat_f1 - upsample_f1) / upsample_f1) * 100

7.06882456280563

In [135]:
np.mean(concat_f1 - upsample_f1) * 100

3.9392757231204696