In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import pickle
import torch
import pandas as pd
from torch import nn
from functools import partial

from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split, Subset
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import pytorch_lightning as pl

from models.models import MLPModel, ResNetBigger

In [3]:
from IPython.display import clear_output
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from lared_laughter.constants import dataset_path, audioset_data_path
from dataset import SwitchBoardLaughterDataset
from audio_utils import featurize_mfcc, featurize_melspec

In [4]:
class System(pl.LightningModule):
    def __init__(self, model_name, model_hparams={}, optimizer_name='adam', optimizer_hparams={}):
        """
        Inputs:
            model_name - Name of the model/CNN to run. Used for creating the model (see function below)
            model_hparams - Hyperparameters for the model, as dictionary.
            optimizer_name - Name of the optimizer to use. Currently supported: Adam, SGD
            optimizer_hparams - Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
        """
        super().__init__()

        # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        self.save_hyperparameters()

        self.model = {
            'mlp': MLPModel(),
            'resnet': ResNetBigger(linear_layer_size=64, filter_sizes=[64,32,16,16])
        }[model_name]

    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        return self.model(x)

    def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        X, Y = batch

        output = self.model(X).squeeze()
        loss = F.binary_cross_entropy_with_logits(output, Y.float())

        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=.001)
        return optimizer

    def validation_step(self, batch, batch_idx):
        X, Y = batch

        output = self.model(X).squeeze()
        val_loss = F.binary_cross_entropy_with_logits(output, Y.float())
        self.log('val_loss', val_loss)

        return (output, Y.squeeze())

    def validation_epoch_end(self, validation_step_outputs):
        all_outputs = torch.cat([o[0] for o in validation_step_outputs]).cpu()
        all_labels = torch.cat([o[1] for o in validation_step_outputs]).cpu()

        try:
            val_auc = roc_auc_score(all_labels, all_outputs)
            self.log('val_auc', val_auc)
        except ValueError:
            pass

    def test_step(self, batch, batch_idx):
        X, Y = batch

        output = self.model(X).squeeze()

        return (output, Y.squeeze())

    def test_epoch_end(self, test_step_outputs):
        all_outputs = torch.cat([o[0] for o in test_step_outputs]).cpu()
        all_labels = torch.cat([o[1] for o in test_step_outputs]).cpu()

        self.test_results = {'proba': all_outputs, 'labels': all_labels}
        try:
            test_auc = roc_auc_score(all_labels, all_outputs)
            self.test_results['auc'] = test_auc
            self.log('test_auc', test_auc)
        except ValueError:
            pass

In [5]:
def do_fold(train_ds, test_ds, model_name='resnet', trainer_params={}):
    # data loaders
    data_loader_train = torch.utils.data.DataLoader(
        train_ds, batch_size=100, shuffle=True, num_workers=10,
        collate_fn=None)
    data_loader_val = torch.utils.data.DataLoader(
        test_ds, batch_size=100, shuffle=False, num_workers=10,
        collate_fn=None)

    system = System(model_name)
    trainer_fn = partial(pl.Trainer, **trainer_params)
    trainer = trainer_fn(
        callbacks=[EarlyStopping(monitor="val_loss", mode="min")] + trainer_params.get('callbacks', []),
        accelerator='gpu',
        log_every_n_steps=1,
        max_epochs=-1)
    trainer.fit(system, data_loader_train, data_loader_val)

    trainer.test(system, data_loader_val)
    return system.test_results

In [6]:
def get_metrics(outputs, labels, type='binary'):
    if type == 'binary':
        proba = torch.sigmoid(outputs)
        pred = (proba > 0.5)

        correct = pred.eq(outputs.bool()).sum().item()
        return {
            'auc': roc_auc_score(labels, proba),
            'correct': correct
        }
    elif type == 'regression':
        return {
            'mse': torch.nn.functional.mse_loss(outputs, labels, reduction='mean'),
            'l1': torch.nn.functional.l1_loss(outputs, labels, reduction='mean')
        }

In [7]:
def do_run(dataset, model_name, metrics_name='binary'):
    
    seed = 22
    cv_splits = KFold(n_splits=2, random_state=seed, shuffle=True).split(range(len(ds)))

    outputs = torch.empty((len(ds),))
    for f, (train_idx, test_idx) in enumerate(cv_splits):
        # create datasets    
        train_ds = Subset(dataset, train_idx)
        test_ds = Subset(dataset, test_idx)

        fold_outputs = do_fold(train_ds, test_ds, model_name)
        outputs[test_idx] = fold_outputs['proba'].cpu()
        clear_output(wait=True)

    labels = torch.Tensor(ds.get_all_labels())
    run_metrics = get_metrics(outputs, labels, metrics_name)
    return outputs, run_metrics

In [23]:
# dataset loading
audioset_examples = pd.read_csv('./data/audioset/examples.csv')
audioset_audios = pickle.load(open(os.path.join(audioset_data_path, 'audioset_audios.pkl'), 'rb'))

In [24]:
asds = SwitchBoardLaughterDataset(
    df=audioset_examples,
    audios=audioset_audios,
    feature_fn=partial(featurize_melspec, hop_length=186),
    sr=8000,
    subsample_length=1,
    id_column='yt_id',
    label_column='laughter')

df: 19354, audios: 15947, not found: 3407
df: 15947, audios: 15947, not found: 3407


In [26]:
asds[0][0].shape

(1, 44, 128)

# Train the audioset model

In [5]:
from pytorch_lightning.callbacks import ModelCheckpoint
from sklearn.model_selection import ShuffleSplit

In [6]:
# set seeds
pl.utilities.seed.seed_everything(22)

Global seed set to 22


22

In [7]:
# saves top-K checkpoints based on "val_loss" metric
checkpoint_callback = ModelCheckpoint(
    save_top_k=3,
    monitor="val_loss",
    mode="min",
    dirpath="./pretrained_audioset/",
    filename="audioset-{epoch:02d}-{val_loss:.2f}",
)

In [8]:
train_idx, test_idx = next(iter(ShuffleSplit(n_splits=1, test_size=0.15, random_state=22).split(range(len(ds)))))
train_ds = Subset(ds, train_idx)
test_ds = Subset(ds, test_idx)
fold_outputs = do_fold(train_ds, test_ds, 'resnet',
    trainer_params={'callbacks': [checkpoint_callback]})

NameError: name 'ds' is not defined