In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')
import os
import pickle
import torch
import pandas as pd
from torch import nn
from functools import partial
import torch.nn.functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split, Subset
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import pytorch_lightning as pl

from models.models import MLPModel, ResNetBigger

In [3]:
from IPython.display import clear_output
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from constants import cloud_data_path, audioset_data_path
from dataset import SwitchBoardLaughterDataset
from audio_utils import featurize_mfcc

In [4]:
class System(pl.LightningModule):
    def __init__(self, model_name, model_hparams={}, optimizer_name='adam', optimizer_hparams={}):
        """
        Inputs:
            model_name - Name of the model/CNN to run. Used for creating the model (see function below)
            model_hparams - Hyperparameters for the model, as dictionary.
            optimizer_name - Name of the optimizer to use. Currently supported: Adam, SGD
            optimizer_hparams - Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
        """
        super().__init__()

        # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        self.save_hyperparameters()

        self.model = {
            'mlp': MLPModel(),
            'resnet': ResNetBigger(linear_layer_size=64, filter_sizes=[64,32,16,16])
        }[model_name]

    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        return self.model(x)

    def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        X, Y = batch

        output = self.model(X).squeeze()
        loss = F.binary_cross_entropy_with_logits(output, Y)

        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=.001)
        return optimizer

    def validation_step(self, batch, batch_idx):
        X, Y = batch

        output = self.model(X).squeeze()
        val_loss = F.binary_cross_entropy_with_logits(output, Y)
        self.log('val_loss', val_loss)

        return (output, Y.squeeze())

    def validation_epoch_end(self, validation_step_outputs):
        all_outputs = torch.cat([o[0] for o in validation_step_outputs]).cpu()
        all_labels = torch.cat([o[1] for o in validation_step_outputs]).cpu()

        val_auc = roc_auc_score(all_labels, all_outputs)
        self.log('val_auc', val_auc)

    def test_step(self, batch, batch_idx):
        X, Y = batch

        output = self.model(X).squeeze()

        return (output, Y.squeeze())

    def test_epoch_end(self, test_step_outputs):
        all_outputs = torch.cat([o[0] for o in test_step_outputs]).cpu()
        all_labels = torch.cat([o[1] for o in test_step_outputs]).cpu()

        test_auc = roc_auc_score(all_labels, all_outputs)
        self.test_results = {'auc': test_auc, 'proba': all_outputs}
        self.log('test_auc', test_auc)

In [5]:
def do_fold(train_ds, test_ds, model_name='resnet'):
    # data loaders
    data_loader_train = torch.utils.data.DataLoader(
        train_ds, batch_size=100, shuffle=True, num_workers=10,
        collate_fn=None)
    data_loader_val = torch.utils.data.DataLoader(
        test_ds, batch_size=100, shuffle=False, num_workers=10,
        collate_fn=None)

    system = System(model_name)
    trainer = pl.Trainer(
        callbacks=[EarlyStopping(monitor="val_loss", mode="min")],
        accelerator='gpu',
        log_every_n_steps=1,
        max_epochs=-1)
    trainer.fit(system, data_loader_train, data_loader_val)

    trainer.test(system, data_loader_val)
    return system.test_results

In [6]:
def get_metrics(outputs, labels, type='binary'):
    if type == 'binary':
        proba = torch.sigmoid(outputs)
        pred = (proba > 0.5)

        correct = pred.eq(outputs.bool()).sum().item()
        return {
            'auc': roc_auc_score(labels, proba),
            'correct': correct
        }
    elif type == 'regression':
        return {
            'mse': torch.nn.functional.mse_loss(outputs, labels, reduction='mean'),
            'l1': torch.nn.functional.l1_loss(outputs, labels, reduction='mean')
        }

In [7]:
def do_run(dataset, model_name, metrics_name='binary'):
    
    seed = 22
    cv_splits = KFold(n_splits=2, random_state=seed, shuffle=True).split(range(len(ds)))

    outputs = torch.empty((len(ds),))
    for f, (train_idx, test_idx) in enumerate(cv_splits):
        # create datasets    
        train_ds = Subset(dataset, train_idx)
        test_ds = Subset(dataset, test_idx)

        fold_outputs = do_fold(train_ds, test_ds, model_name)
        outputs[test_idx] = fold_outputs['proba'].cpu()
        clear_output(wait=True)

    labels = torch.Tensor(ds.get_all_labels())
    run_metrics = get_metrics(outputs, labels, metrics_name)
    return outputs, run_metrics

In [8]:
# dataset loading
examples = pd.read_csv('./data/audioset/examples.csv')
audios = pickle.load(open(os.path.join(audioset_data_path, 'audioset_audios.pkl'), 'rb'))

In [16]:
ds = SwitchBoardLaughterDataset(
    df=examples,
    audios=audios,
    feature_fn=partial(featurize_mfcc, hop_length=186),
    sr=8000,
    subsample=True)

df: 19354, audios: 16148, not found: 11024


In [22]:
ds.notfound[-100:]

['kTLa7Gx_WcY',
 'F0AtWOUItaQ',
 'fFVT_CtL62M',
 'LT_Jufs5YAQ',
 'Bzr_Akg7WJ0',
 '10aBef0Ghkc',
 'Yd0D6_oC0xU',
 'v_y7n20ryX0',
 'fVLi5-KnhtY',
 'Q6SgmlYMYLA',
 'WvZ_wamj6NA',
 '2j4m7JsNtNA',
 'VTOf24hbq0A',
 'tESEL6NZcKY',
 'hoPnrbKOEl8',
 '0x82_HySIVU',
 'b04gwYJKwsQ',
 'EEhnuLFYehU',
 'Ml2KMRBE_L4',
 'PzS_cNwa4xM',
 'QXnJ2manIdI',
 '0CbBZ-XeZNU',
 'LtYJXKeUMXA',
 '-znnr5EbiAc',
 'B9j8-Cf1ZW8',
 'Fy51z2RwH3E',
 'fCr0jToaMs4',
 '6uyTDcNWV_s',
 'N-fa5t6WnDM',
 'PPqooXoOpRs',
 '5ZqT7KgdYu8',
 '077aWlQn6XI',
 'Pk5NZe-ah4U',
 '6iNLmtQmy3Y',
 'wMfcj8J1aso',
 '1FbxStVuFYU',
 'BAkDiKQEjrY',
 'Uf03ZMnxw9c',
 'FwGEy5Ek_xw',
 '-9mHz0OsKKw',
 'hMTj17ezEuI',
 'lkIpnrLIVVc',
 's4_fKHLaaRg',
 '31c2kosdtuo',
 'dBNvrAJqw-I',
 'HIn8Gt_bc5Y',
 'Z0htOHTOtHY',
 '4VegMOtu5YU',
 'fofP4lar_QY',
 'bNhA2IsT9R4',
 'QP_ZCssCySw',
 '92sRFZvCnWo',
 'E3F9bzeCgTQ',
 'tHIQdXyNxwY',
 'QPR_tghZjAI',
 'zYM0gtd_PRo',
 'DG5d4megH8g',
 'yvtRYZ5EasA',
 'MuSozMIsi7g',
 'JV_IOR3DqiM',
 'F3WR_RJ9fDM',
 'zE6BVRSQNZU',
 '22DYoQ

In [14]:
examples.head()

Unnamed: 0.1,Unnamed: 0,yt_id,start_time,end_time,tag_strings,laughter
0,0,--5OkAjCI7g,40.0,50.0,"['/m/07sq110', '/m/0ytgt']",True
1,1,--AQYzDx57k,0.0,10.0,"['/m/07rgt08', '/m/09x0r']",True
2,2,--sIMPsphRI,10.0,20.0,"['/m/01j3sz', '/m/05tny_', '/m/068hy', '/m/07r...",True
3,3,-1rvxdiILiM,30.0,40.0,"['/m/04rlf', '/m/07rgt08']",True
4,4,-3c6pnDzbt8,0.0,10.0,"['/m/01j3sz', '/m/09x0r']",True


In [34]:
ds[0]

(0, 50.0, 10.032)
(4.818022263189084, 1.0, 10.032)


(array([[ 7.58273574e-02, -8.08831940e+01,  2.76844616e+01, ...,
          6.37817919e-01,  4.71592188e-01,  6.44641519e-01],
        [ 1.02906934e-01, -8.21656876e+01,  2.29767151e+01, ...,
          6.37817919e-01,  4.71592188e-01,  6.44641519e-01],
        [ 1.23297036e-01, -8.74584351e+01,  2.98735294e+01, ...,
          6.37817919e-01,  4.71592188e-01,  6.44641519e-01],
        ...,
        [ 1.08495703e-01, -5.68558159e+01,  1.47418499e+01, ...,
          3.38231713e-01,  1.76223204e-01, -2.87418544e-01],
        [ 1.06348523e-01, -5.46372261e+01,  1.24134502e+01, ...,
          3.38231713e-01,  1.76223204e-01, -2.87418544e-01],
        [ 9.18559128e-02, -6.25113564e+01,  1.73280182e+01, ...,
          3.38231713e-01,  1.76223204e-01, -2.87418544e-01]]),
 True)

In [35]:
outputs, metrics = do_run(ds, 'resnet', 'binary')

training with dropout=0.5
training with dropout=0.5


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /mnt/c/Users/Jose/Documents/furnace/lared-laughter/audio/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type         | Params
---------------------------------------
0 | model | ResNetBigger | 221 K 
---------------------------------------
221 K     Trainable params
0         Non-trainable params
221 K     Total params
0.887     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

(0, 50.0, 10.032)
(6.923743460370193, 1.0, 10.032)
(0, 390.0, 10.031125)
(0, 40.0, 10.032)(0.27314181848414243, 1.0, 10.031125)

(0, 10.0, 10.032)
(0, 90.0, 10.032)(8.678272183076839, 1.0, 10.032)(2.9256491713961794, 1.0, 10.032)
(0, 10.0, 10.032)(0, 20.0, 10.032)



(4.5808858874405445, 1.0, 10.032)(7.3563552022461485, 1.0, 10.032)
(5.0293857214034645, 1.0, 10.032)(0, 370.0, 10.032)


(0.17019339061911762, 1.0, 10.032)
(0, 20.0, 10.032)
(0.3031256918567414, 1.0, 10.032)(0, 10.0, 10.032)
(0, 440.0, 10.031125)

(5.483010701096184, 1.0, 10.032)
(3.1476945096945825, 1.0, 10.031125)(0, 120.0, 10.032)(0, 570.0, 10.031125)

(0, 430.0, 10.032)
(4.008147529413328, 1.0, 10.032)
(1.591065236930446, 1.0, 10.031125)(5.9202852383784474, 1.0, 10.032)

(0, 10.0, 10.032)

(0, 70.0, 10.032)(1.7295632809359318, 1.0, 10.032)
(0, 160.0, 10.032)
(7.297754850217813, 1.0, 10.032)
(0, 50.0, 10.032)(2.9917862983565633, 1.0, 10.032)
(0, 30.0, 10.032)

(8.220980023752713, 1.0, 10.032)

(5.17430382171779, 1.0, 10

KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/jose/.local/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/jose/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/jose/.local/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/jose/.local/lib/python3.8/site-packages/torch/utils/data/dataset.py", line 363, in __getitem__
    return self.dataset[self.indices[idx]]
  File "/mnt/c/Users/Jose/Documents/furnace/lared-laughter/audio/dataset.py", line 28, in __getitem__
    audio_file = self.audios[yt_id]
KeyError: '-4yy61BH-O8'


(0, 40.0, 10.032)



(8.09308159817612, 1.0, 10.032)(4.781996145329628, 1.0, 10.032)(2.788255199422811, 1.0, 10.032)(7.1476008235799, 1.0, 10.032)



(0, 14.0, 9.768)
(8.550533206177027, 1.0, 9.768)(0, 20.0, 10.032)(0, 160.0, 10.032)


(2.1052466514007167, 1.0, 10.032)(5.362076766924785, 1.0, 10.032)

(0, 40.0, 10.032)
(0, 130.0, 10.032)(0, 100.0, 10.032)(5.599421946975155, 1.0, 10.032)


(4.727751378718121, 1.0, 10.032)(8.12805523203163, 1.0, 10.032)

(0, 40.0, 10.032)(0, 130.0, 10.032)

(7.600314426966291, 1.0, 10.032)(0.8046210800920879, 1.0, 10.032)

(0, 100.0, 10.032)
(5.8705746617074945, 1.0, 10.032)(0, 80.0, 10.032)

(0.8863354382302091, 1.0, 10.032)
(0, 31.0, 9.216)
(4.241661947126872, 1.0, 9.216)(0, 25.0, 9.024)

(1.9655380239713296, 1.0, 9.024)
(0, 10.0, 10.032)
(0, 390.0, 10.032)(4.6064981031817585, 1.0, 10.032)

(8.87871830532263, 1.0, 10.032)
(0, 40.0, 10.032)
(6.2413270784531205, 1.0, 10.032)
(0, 440.0, 10.032)
(5.789322419679352, 1.0, 10.032)
(0, 10.0, 10.032)
(5.8126524

In [130]:
metrics

{'auc': 0.8161846056582899, 'correct': 308}

In [131]:
outputs, metrics = do_run(ds, 'alexnet', 'binary')

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | MyAlexNet | 180 K 
------------------------------------
180 K     Trainable params
0         Non-trainable params
180 K     Total params
0.724     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.8045634920634921
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [132]:
metrics

{'auc': 0.8096451714872768, 'correct': 326}