In [1]:
# default_exp audio.util

# Audio Utility Functions 
> Includes metrics, losses and other utility functions.

In [2]:
#hide
from nbdev.export import notebook2script

In [3]:
#export 
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Beta
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.model_selection import KFold
from IPython.core.debugger import set_trace
from fastai.vision.all import *
from dl_pipeline.audio.core import *
from dl_pipeline.vision.losses import *

In [4]:
#export
def mask2category(x, y):
    y = TensorAudioLabel(np.nanmax(y.cpu(), axis=(1,2))).type(x.type()).to(x.device)
    return x.float(), y.long()

def accuracy(x, y, before=mask2category, after=lambda o:o):
    f = lambda x,y : (x.argmax(-1) == y).float().mean()
    return after(f(*before(x,y))) 

def lrap(x, y, before=mask2category, after=lambda o:o):
    x, y = before(x, y)
    y = F.one_hot(y, x.shape[1]).cpu().numpy()
    x = x.softmax(1).cpu().numpy()
    return label_ranking_average_precision_score(y, x)


In [5]:
#export
def kfold_dataframes(df, fold_number, n_splits=5, shuffle=True, random_state=2021):
    kf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
    train_idx, valid_idx = list(kf.split(df.index))[fold_number]
    return df.loc[train_idx].reset_index(drop=True), df.loc[valid_idx].reset_index(drop=True)

In [6]:
%%time
df = pd.read_csv('/kaggle/kaggle_rainforest_audio/data/train_tp.csv')
train_df, valid_df = kfold_dataframes(df, 0)
len(train_df), len(valid_df)

CPU times: user 6.23 ms, sys: 0 ns, total: 6.23 ms
Wall time: 6.01 ms


(972, 244)

In [7]:
#export
class OneHot(Callback):
    run_valid = False
    
    def before_batch(self):
        self.learn.yb = tuple([F.one_hot(self.yb[0], self.dls.c).float()])
        
class MixUp(Callback):
    run_valid = False
    def __init__(self, alpha=0.4, onehot=False):
        self.alpha = alpha
        self.distrib = Beta(alpha, alpha)
        self.onehot = onehot
    
    def before_batch(self):
        bs      = self.xb[0].shape[0]
        device  = self.xb[0].device
        lambd = self.distrib.sample((self.y.size(0),)).squeeze().to(self.x.device)
        lambd = torch.stack([lambd, 1-lambd], 1).max(1)[0]
        shuffle = torch.randperm(bs).to(device)
        xb1, yb1 = self.xb[0][shuffle], self.yb[0][shuffle]
        a = tensor(lambd).float().view(-1, 1, 1, 1).to(device)
        self.learn.xb = tuple([a*self.xb[0] + (1-a)*xb1])
        a = a.view(-1)
        if self.onehot:
            while len(a.shape) < len(yb1.shape):
                a = a[...,None]
            self.learn.yb = tuple([a*self.learn.yb[0] + (1-a)*yb1])
        else:
            self.learn.yb = tuple([{'yb': self.learn.yb[0], 'yb1': yb1, 'a': a}])
            
class LabelSED(Callback):
    run_valid = True
    def __init__(self, model_n_rescales):
        self.rescale = 2**model_n_rescales
        
    def before_batch(self):
        y = self.learn.yb[0]
        y = y[...,::y.shape[-1]//self.rescale].max(2).values.float()
        self.learn.yb = tuple([y])

In [8]:
%%time
MixUp(0.4, onehot=True)

CPU times: user 214 µs, sys: 29 µs, total: 243 µs
Wall time: 219 µs


MixUp

In [9]:
#hide 
notebook2script()

Converted 00_core.ipynb.
Converted 00vision_losses.ipynb.
Converted 00vision_models.ipynb.
Converted 00vision_triplet.ipynb.
Converted 01audio_augmentations.ipynb.
Converted 01audio_core.ipynb.
Converted 01audio_dataset.ipynb.
Converted 01audio_util.ipynb.
Converted 88_external_xresnet_ssa.ipynb.
Converted index.ipynb.
Converted kaggle_rfcx-species-audio-detection-sed.ipynb.
Converted kaggle_rfcx-species-audio-detection.ipynb.
