In [1]:
# default_exp kaggle.rfcx_species_audio_detection

# Kaggle rfcx-species-audio-detection competition

> This module contains the pipeline for the rfcx-species-audio-detection competition: https://www.kaggle.com/c/rfcx-species-audio-detection

In [2]:
#hide
from nbdev.export import notebook2script

In [3]:
#export
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from fastprogress import progress_bar
from IPython.core.debugger import set_trace
import gc
from fastscript import *
from fastcore.all import *
from fastai.vision.all import *
from dl_pipeline.core import *
from dl_pipeline.audio.core import *
from dl_pipeline.audio.augmentations import *
from dl_pipeline.audio.dataset import *
from dl_pipeline.vision.models import *
from dl_pipeline.vision.losses import *
from dl_pipeline.audio.util import *

### Resample and save waveform data in npy files

The following code will read the input flac files using librosa, resample to 32000 Hz and save as npy files. This will save time for loading the data during training and inference. If the files already exist in the path_save_npy, they will be ignored.

In [4]:
sample_rate = 32_000
path = Path('/kaggle/kaggle_rainforest_audio/data')
train_path = path/'train'
test_path = path/'test'
path_save_npy = Path(path/f'npy{sample_rate}/train')

files = train_path.ls()
files = [f for f in files if '.flac' in f.suffix]
f = partial(audio2npy, path_save=path_save_npy, sample_rate=sample_rate)
parallel(f, files)

files = test_path.ls()
files = [f for f in files if '.flac' in f.suffix]
f = partial(audio2npy, path_save=path_save_npy, sample_rate=sample_rate)
parallel(f, files)

(#1992) [None,None,None,None,None,None,None,None,None,None...]

### Define agumentation Pipeline

In [5]:
#export
def audio_augment(sample_rate, p=0.25):
    return Pipeline([
        ClippingDistortion(sample_rate, max_percentile_threshold=10, p=p),
        PitchShift(sample_rate, min_semitones=-8, max_semitones=8, p=p),
    ])

### Define train setup

In [6]:
#export
def train(sample_rate, num_classes, fold, n_epochs, lr, wd, tile_width, bs, aug_ps, 
          model_name, loss_func, plot, load_checkpoint=None, lr_find=False, head_ps=0.8,
          mixup=False, n_mels=128, hop_length=512, model_arch='resnest50'):
    seed_everything()
    cbs = []
    path = Path('/kaggle/kaggle_rainforest_audio/data')
    rename_cols = RenameColumns(id='recording_id', label='species_id', tmin='t_min', 
                                tmax='t_max',fmin='f_min', fmax='f_max')
    
    df = Pipeline([load_dataframe, rename_cols, group_labels])(path/'train_tp.csv')
        
    train_df, valid_df = kfold_dataframes(df, fold)
        
    tfms = partial(apply_augmentations, augs_pipeline=audio_augment(sample_rate, p=aug_ps))

    train_data = Datasets(items=train_df, tfms=partial(create_dataset_item, path=path,
                                               sample_rate=sample_rate, tile_width=tile_width,
                                               n_mels=n_mels, hop_length=hop_length))
    
    valid_data = Datasets(items=valid_df, tfms=partial(create_dataset_item, path=path,
                                               sample_rate=sample_rate, tile_width=tile_width,
                                               n_mels=n_mels, hop_length=hop_length))
    train_dl = DataLoader(
        train_data, bs=bs, do_batch=reorganize_batch, shuffle=True, 
        num_workers=8, after_item=tfms, 
        after_batch=MelSpectrogram(sample_rate,n_mels=n_mels,hop_length=hop_length))
    
    valid_dl = DataLoader(
        valid_data, bs=bs, do_batch=reorganize_batch, num_workers=8,
        after_batch=MelSpectrogram(sample_rate, n_mels=n_mels,hop_length=hop_length))
    
    dls = DataLoaders(train_dl, valid_dl)
    dls.device = torch.device("cuda:0")        
    
    if plot: 
        xb, yb = dls.one_batch()
        show_augmentations(train_data, train_dl, sample_rate=sample_rate)

    model = get_model(model_arch, num_classes=num_classes, head_ps=head_ps, in_channels=1)
    
    if mixup: 
        cbs.append(MixUp(0.4))
        loss_func += '_mixup'
    
    def before_loss(x,y):
        x,y=mask2category(x,y)
        return x, y
    
    def after_loss(loss, y):
        return loss
    
    loss = get_loss(loss_func, before=before_loss, after=after_loss)
    print('Loss function: ', loss_func)
            
    learn = Learner(dls, model, loss_func=loss, metrics=[accuracy, lrap], cbs=cbs)
    learn.to_fp16(clip=0.5);
        
    if load_checkpoint is not None:
        learn.load(path.parent/f'models/{load_checkpoint}_fold{fold}')
        print('Load model ', path.parent/f'models/{load_checkpoint}_fold{fold}')
        
    if lr_find: learn.lr_find()

    learn.fit_one_cycle(n_epochs, lr, wd=wd, div_final=10, div=10)
    learn.save(path.parent/f'models/{model_name}_fold{fold}')
    print(f'Model saved to', path.parent/f'models/{model_name}_fold{fold}')

### Define inference setup

In [7]:
#export
def post_processing(df, path_save, model_name, tile_width, MODE=2):
    """
    Post processing idea by Chris Deotte shared at 
    https://www.kaggle.com/c/rfcx-species-audio-detection/discussion/220389
    """
    # USE MODE 1, 2, or 3
    # LOAD SUBMISSION
    FUDGE = 2.0
    for k in range(24):
        df.iloc[:,1+k] -= df.iloc[:,1+k].min()
        df.iloc[:,1+k] /= df.iloc[:,1+k].max()

    # CONVERT PROBS TO ODDS, APPLY MULTIPLIER, CONVERT BACK TO PROBS
    def scale(probs, factor):
        probs = probs.copy()
        idx = np.where(probs!=1)[0]
        odds = factor * probs[idx] / (1-probs[idx])
        probs[idx] =  odds/(1+odds)
        return probs

    # TRAIN AND TEST MEANS
    d1 = df.iloc[:,1:].mean().values
    d2 = np.array([113,204,44,923,53,41,3,213,44,23,26,149,255,14,123,222,46,6,474,4,17,18,23,72])/1000.

    for k in range(24):
        if MODE==1: d = FUDGE
        if MODE==2: d = d1[k]/(1-d1[k])
        if MODE==3: s = d2[k] / d1[k]
        else: s = (d2[k]/(1-d2[k]))/d
        df.iloc[:,k+1] = scale(df.iloc[:,k+1].values,s)

    df.to_csv(path_save/f'submission_with_pp_{model_name}_{tile_width}.csv',index=False)
    
def ensemble(files):
    dfs = [pd.read_csv(f) for f in files]
    df = pd.concat(dfs).groupby('recording_id').mean().reset_index()
    tstr = datetime.now().strftime('%Y%m%d%H%M')
    fsave = files[0].parent/f'submission_ens_{tstr}.csv'
    df.to_csv(fsave, index=False)
    print(f'Saved to {fsave}')
    
def get_preds(dataloader, model, device=torch.device("cuda:0"), max_reduce=True):
    model.eval().to(device)
    with torch.no_grad():
        preds, ys = [], []
        for x, y in progress_bar(dataloader):
            if max_reduce:
                pred = model(x).max(0).values[None]
            else:
                pred = model(x)[None]
            preds.append(pred.cpu())
            ys.append(y.cpu())
        preds = torch.cat(preds, dim=0)
        ys = torch.cat(ys, dim=0)
    return preds, ys
        
def test(sample_rate, num_classes, tile_widths, model_name, ens_folds, head_ps=0.8, 
         n_mels=128, hop_length=512, save_preds=False, model_arch='resnest50'):
    bs = 1
    _path_save = Path('preds')
    _path_save.mkdir(exist_ok=True)
    max_reduce = not save_preds

    ens_files = []
    for tile_width in tile_widths:
        print(f'Running inference for tile_width {tile_width}')
        preds_ens, preds_valid, ys_valid = [], [], []
        preds_train, ys_train = [], []
        for fold in ens_folds:
            seed_everything()
            print(f'Running inference for fold {fold}')
            path = Path('/kaggle/kaggle_rainforest_audio/data')
            rename_cols_test = RenameColumns(id='recording_id')
            rename_cols_valid = RenameColumns(id='recording_id', label='species_id', tmin='t_min', 
                                        tmax='t_max',fmin='f_min', fmax='f_max')

            df = Pipeline([load_dataframe, rename_cols_valid, group_labels])(path/'train_tp.csv')

            train_df, valid_df = kfold_dataframes(df, fold)

            test_df = Pipeline([load_dataframe, rename_cols_test])(path/'sample_submission.csv')
            fp_df = Pipeline([load_dataframe, rename_cols_valid, group_labels])(path/'train_fp.csv')

            datasets = [Datasets(items=dataframe, tfms=partial(create_dataset_item, path=path,
                sample_rate=sample_rate, tile_width=None, n_mels=n_mels, hop_length=hop_length))
                for dataframe in [train_df, valid_df, test_df, fp_df]]

            dls = DataLoaders(*[DataLoader(dataset, bs=bs, do_batch=reorganize_batch, num_workers=8, 
                            after_batch=Pipeline([MelSpectrogram(sample_rate, n_mels=n_mels, 
                                        hop_length=hop_length), TilesTransform(tile_width)])) 
                 for dataset in datasets])

            model = get_model(model_arch, num_classes=num_classes, head_ps=head_ps, in_channels=1,
                              pretrained=False)

            dls.device = torch.device("cuda:0")        
            learn = Learner(dls, model, loss_func=cross_entropy, metrics=[accuracy, lrap])
            learn.to_fp16(clip=0.5);
            learn.load(path.parent/f'models/{model_name}_fold{fold}')
            print('Load model: ', path.parent/f'models/{model_name}_fold{fold}')

            preds, ys = get_preds(dls[1], model, max_reduce=max_reduce)
            np.save(_path_save/f'{model_name}_{tile_width}_fold{fold}_valid.npy', 
                    {'preds':preds, 'ys':ys})
            if not max_reduce: preds = preds.max(1).values
            preds_valid.append(preds)
            ys_valid.append(ys)

            preds, ys = get_preds(dls[2], model, max_reduce=max_reduce)
            np.save(_path_save/f'{model_name}_{tile_width}_fold{fold}_test.npy', 
                    {'preds':preds, 'ys':ys})
            if not max_reduce: preds = preds.max(1).values
            preds_ens.append(preds[None])

        preds_valid, ys_valid = torch.cat(preds_valid), torch.cat(ys_valid)
        valid_score = lrap(preds_valid, ys_valid.long().squeeze(), before=lambda *o:o)
        print(f'Validation score: {valid_score:.3f}')

        preds_ens = torch.cat(preds_ens).mean(0).softmax(-1)
        test_df = Pipeline([load_dataframe])(path/'sample_submission.csv')

        for i in range(preds_ens.shape[1]):
            test_df.loc[:, f's{i}'] = preds_ens[:,i]

        tstr = datetime.now().strftime('%Y%m%d%H%M')
        test_df.to_csv(path.parent/f'subs/submission_{tstr}.csv',index=False)
        print('Submission file saved: ', path.parent/f'subs/submission_{model_name}_{tile_width}.csv')
        
        #Post-processing
        post_processing(test_df, path.parent/'subs', model_name, tile_width)
        
        ens_files.append(path.parent/f'subs/submission_with_pp_{model_name}_{tile_width}.csv')
    ensemble(ens_files)

### Create command line function

In [8]:
#export
@call_parse
def main(fold:Param('Fold number', int)=0, 
         n_epochs:Param('Number of training epochs', int)=30,
         lr:Param('Learning rate', float)=1e-3,
         wd:Param('Weight decay', float)=3e-2,
         loss_func:Param('Loss function', str)='cross_entropy',
         tile_width:Param('Tile width', int)=1024,
         tile_widths_inference:Param('List of tile widths for inference', list)=[128,256],
         sample_rate:Param('Sample rate', int)=32000,
         n_mels:Param('Spectrogram n_mels', int)=128,
         hop_length:Param('Spectrogram hop_length', int)=640,
         bs:Param('Batch size', int)=32,
         accumulate_gradients:Param('Batch size for gradient accumulation', int)=None,
         aug_ps:Param('Augmentation probability', float)=0.25,
         model_arch:Param('Name of model architecture', str)='densenet121',
         model_name:Param('Name of parameters file', str)='model_n0',
         ens_folds:Param('Folds to use for ensemble', list)=[0,1,2,3,4],
         run_test:Param('Run test prediction (default is train)', str)=False,
         load_checkpoint:Param('Load model checkpoint before new train loop', str)=None,
         head_ps:Param('Model head dropout probability', float)=0.8,
         mixup:Param('Use mixup', str)=True,
         save_preds:Param('Save model predictions for train and validation', str)=False):
    
    num_classes = 24
    if run_test in [True, 'true', 'True']: run_test = True
    if mixup in [True, 'true', 'True']: mixup = True
    if save_preds in [True, 'true', 'True']: save_preds=True
    if run_test:
        test(sample_rate, num_classes, tile_widths_inference, model_name, ens_folds, head_ps=head_ps, 
             n_mels=n_mels, hop_length=hop_length, save_preds=save_preds, model_arch=model_arch)
    else:
        train(sample_rate, num_classes, fold, n_epochs, lr, wd, tile_width, bs, aug_ps, 
          model_name, loss_func, plot=False, load_checkpoint=load_checkpoint, lr_find=False,
          head_ps=head_ps, mixup=mixup, n_mels=n_mels, hop_length=hop_length, model_arch=model_arch)

### Train model
```python
for i in [0,1,2,3,4]:
    train(
        sample_rate=32000,
        num_classes=24,
        fold=i,
        n_epochs=30,
        lr=1e-3,
        wd=3e-2,
        n_mels=128,
        hop_length=640,
        tile_width=1024,
        bs=32,
        aug_ps=0.25,
        model_arch='resnest50',
        model_name='model_n100',
        loss_func='cross_entropy', 
        plot=False,  
        load_checkpoint=None, 
        lr_find=False, 
        head_ps=0.8,
        mixup=True)
```

### Generate predictions for test data
```python
test(sample_rate=32000, 
     num_classes=24, 
     n_mels=128,
     hop_length=640,
     tile_width=[128,256], 
     model_arch='resnest50',
     model_name='model_n100', 
     ens_folds=[0,1,2,3,4],
     save_preds=True)
```

### Running train and predictions on terminal
```bash
#!/bin/bash
arch='resnest50'
model_name='model_n100'
sample_rate=32000
n_mels=128
hop_length=640
for fold in 0 1 2 3 4
do
    echo "Training $model for fold $fold"
    kaggle_rainforest2021 --fold $fold --model_name $model_name --model_arch $arch \
                          --sample_rate $sample_rate --n_mels $n_mels \
                          --hop_length $hop_length --bs 32 --head_ps 0.8 \
                          --tile_width 1024 --mixup true >> log.train
done

echo "Generate predictions for $model"
kaggle_rainforest2021 --run_test true --model_name $model_name --model_arch $arch \
                      --sample_rate $sample_rate --n_mels $n_mels \
                      --hop_length $hop_length --save_preds true >> log.predict
```

In [9]:
#hide
notebook2script()

Converted 00_core.ipynb.
Converted 00vision_losses.ipynb.
Converted 00vision_models.ipynb.
Converted 00vision_triplet.ipynb.
Converted 01audio_augmentations.ipynb.
Converted 01audio_core.ipynb.
Converted 01audio_dataset.ipynb.
Converted 01audio_util.ipynb.
Converted index.ipynb.
Converted kaggle_rfcx-species-audio-detection.ipynb.
