# Birdsong Pytorch Baseline: ResNeSt50-fast (Training)

### import libraries

In [None]:
!nvidia-smi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install soundfile --quiet
!pip install audioread --quiet
#!pip install catalyst

In [None]:
#cd /content/resnest-0.0.6b20200701/resnest/

In [None]:
import os
import gc
import time
import shutil
import random
import warnings
import typing as tp
from pathlib import Path
#from contextlib import contextmanager

#import yaml
#from joblib import delayed, Parallel

import cv2
import librosa
import audioread
import soundfile as sf

import numpy as np
import pandas as pd

from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import resnest.torch as resnest_torch

pd.options.display.max_rows = 500
pd.options.display.max_columns = 500

Data_Creation

In [None]:
cd /content/
!mkdir npy_files
!unzip -qq "/content/drive/My Drive/Resnet/resnest50-fast-package.zip"

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnetnpyfile1.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnetnpyfile2.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnetnpyfile3.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnetnpyfile4.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnetnpyfile5.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnet11.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnet9.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnet8.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

In [None]:
!unzip -qq "/content/drive/My Drive/Resnet/resnet7.zip"
!cp -r f4k/* npy_files/
!rm -rf f4k

### define utilities

In [None]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
#     torch.backends.cudnn.deterministic = True  # type: ignore
#     torch.backends.cudnn.benchmark = True  # type: ignore
    

set_seed(1213)

### read data

In [None]:
train = pd.read_csv("train_mod.csv")

In [None]:
for x in range(len(train)):
  a=train.resampled_filename[x]
  sep='.'
  a=a.split(sep,1)[0]
  ebird_code=train.ebird_code[x]
  train.resampled_filename[x]='/content/npy_files/'+ebird_code+'_'+a+'.npy'

In [None]:
# train_org = train.copy()
# TRAIN_RESAMPLED_DIR = Path("/kaggle/processed_data/train_audio_resampled")
# TRAIN_RESAMPLED_DIR.mkdir(parents=True)

# for ebird_code in train.ebird_code.unique():
#     ebird_dir = TRAIN_RESAMPLED_DIR / ebird_code
#     ebird_dir.mkdir()

# warnings.simplefilter("ignore")
# train_audio_infos = train[["ebird_code", "filename"]].values.tolist()
# Parallel(n_jobs=NUM_THREAD, verbose=10)(
#     delayed(resample)(ebird_code, file_name, TARGET_SR) for ebird_code, file_name in train_audio_infos)

# train["resampled_sampling_rate"] = TARGET_SR
# train["resampled_filename"] = train["filename"].map(
#     lambda x: x.replace(".mp3", ".wav"))
# train["resampled_channels"] = "1 (mono)"

## Definition

### Dataset
* forked from: https://github.com/koukyo1994/kaggle-birdcall-resnet-baseline-training/blob/master/src/dataset.py
* modified partialy


In [None]:
BIRD_CODE = {
    'aldfly': 0, 'ameavo': 1, 'amebit': 2, 'amecro': 3, 'amegfi': 4,
    'amekes': 5, 'amepip': 6, 'amered': 7, 'amerob': 8, 'amewig': 9,
    'amewoo': 10, 'amtspa': 11, 'annhum': 12, 'astfly': 13, 'baisan': 14,
    'baleag': 15, 'balori': 16, 'banswa': 17, 'barswa': 18, 'bawwar': 19,
    'belkin1': 20, 'belspa2': 21, 'bewwre': 22, 'bkbcuc': 23, 'bkbmag1': 24,
    'bkbwar': 25, 'bkcchi': 26, 'bkchum': 27, 'bkhgro': 28, 'bkpwar': 29,
    'bktspa': 30, 'blkpho': 31, 'blugrb1': 32, 'blujay': 33, 'bnhcow': 34,
    'boboli': 35, 'bongul': 36, 'brdowl': 37, 'brebla': 38, 'brespa': 39,
    'brncre': 40, 'brnthr': 41, 'brthum': 42, 'brwhaw': 43, 'btbwar': 44,
    'btnwar': 45, 'btywar': 46, 'buffle': 47, 'buggna': 48, 'buhvir': 49,
    'bulori': 50, 'bushti': 51, 'buwtea': 52, 'buwwar': 53, 'cacwre': 54,
    'calgul': 55, 'calqua': 56, 'camwar': 57, 'cangoo': 58, 'canwar': 59,
    'canwre': 60, 'carwre': 61, 'casfin': 62, 'caster1': 63, 'casvir': 64,
    'cedwax': 65, 'chispa': 66, 'chiswi': 67, 'chswar': 68, 'chukar': 69,
    'clanut': 70, 'cliswa': 71, 'comgol': 72, 'comgra': 73, 'comloo': 74,
    'commer': 75, 'comnig': 76, 'comrav': 77, 'comred': 78, 'comter': 79,
    'comyel': 80, 'coohaw': 81, 'coshum': 82, 'cowscj1': 83, 'daejun': 84,
    'doccor': 85, 'dowwoo': 86, 'dusfly': 87, 'eargre': 88, 'easblu': 89,
    'easkin': 90, 'easmea': 91, 'easpho': 92, 'eastow': 93, 'eawpew': 94,
    'eucdov': 95, 'eursta': 96, 'evegro': 97, 'fiespa': 98, 'fiscro': 99,
    'foxspa': 100, 'gadwal': 101, 'gcrfin': 102, 'gnttow': 103, 'gnwtea': 104,
    'gockin': 105, 'gocspa': 106, 'goleag': 107, 'grbher3': 108, 'grcfly': 109,
    'greegr': 110, 'greroa': 111, 'greyel': 112, 'grhowl': 113, 'grnher': 114,
    'grtgra': 115, 'grycat': 116, 'gryfly': 117, 'haiwoo': 118, 'hamfly': 119,
    'hergul': 120, 'herthr': 121, 'hoomer': 122, 'hoowar': 123, 'horgre': 124,
    'horlar': 125, 'houfin': 126, 'houspa': 127, 'houwre': 128, 'indbun': 129,
    'juntit1': 130, 'killde': 131, 'labwoo': 132, 'larspa': 133, 'lazbun': 134,
    'leabit': 135, 'leafly': 136, 'leasan': 137, 'lecthr': 138, 'lesgol': 139,
    'lesnig': 140, 'lesyel': 141, 'lewwoo': 142, 'linspa': 143, 'lobcur': 144,
    'lobdow': 145, 'logshr': 146, 'lotduc': 147, 'louwat': 148, 'macwar': 149,
    'magwar': 150, 'mallar3': 151, 'marwre': 152, 'merlin': 153, 'moublu': 154,
    'mouchi': 155, 'moudov': 156, 'norcar': 157, 'norfli': 158, 'norhar2': 159,
    'normoc': 160, 'norpar': 161, 'norpin': 162, 'norsho': 163, 'norwat': 164,
    'nrwswa': 165, 'nutwoo': 166, 'olsfly': 167, 'orcwar': 168, 'osprey': 169,
    'ovenbi1': 170, 'palwar': 171, 'pasfly': 172, 'pecsan': 173, 'perfal': 174,
    'phaino': 175, 'pibgre': 176, 'pilwoo': 177, 'pingro': 178, 'pinjay': 179,
    'pinsis': 180, 'pinwar': 181, 'plsvir': 182, 'prawar': 183, 'purfin': 184,
    'pygnut': 185, 'rebmer': 186, 'rebnut': 187, 'rebsap': 188, 'rebwoo': 189,
    'redcro': 190, 'redhea': 191, 'reevir1': 192, 'renpha': 193, 'reshaw': 194,
    'rethaw': 195, 'rewbla': 196, 'ribgul': 197, 'rinduc': 198, 'robgro': 199,
    'rocpig': 200, 'rocwre': 201, 'rthhum': 202, 'ruckin': 203, 'rudduc': 204,
    'rufgro': 205, 'rufhum': 206, 'rusbla': 207, 'sagspa1': 208, 'sagthr': 209,
    'savspa': 210, 'saypho': 211, 'scatan': 212, 'scoori': 213, 'semplo': 214,
    'semsan': 215, 'sheowl': 216, 'shshaw': 217, 'snobun': 218, 'snogoo': 219,
    'solsan': 220, 'sonspa': 221, 'sora': 222, 'sposan': 223, 'spotow': 224,
    'stejay': 225, 'swahaw': 226, 'swaspa': 227, 'swathr': 228, 'treswa': 229,
    'truswa': 230, 'tuftit': 231, 'tunswa': 232, 'veery': 233, 'vesspa': 234,
    'vigswa': 235, 'warvir': 236, 'wesblu': 237, 'wesgre': 238, 'weskin': 239,
    'wesmea': 240, 'wessan': 241, 'westan': 242, 'wewpew': 243, 'whbnut': 244,
    'whcspa': 245, 'whfibi': 246, 'whtspa': 247, 'whtswi': 248, 'wilfly': 249,
    'wilsni1': 250, 'wiltur': 251, 'winwre3': 252, 'wlswar': 253, 'wooduc': 254,
    'wooscj2': 255, 'woothr': 256, 'y00475': 257, 'yebfly': 258, 'yebsap': 259,
    'yehbla': 260, 'yelwar': 261, 'yerwar': 262, 'yetvir': 263
}

INV_BIRD_CODE = {v: k for k, v in BIRD_CODE.items()}

In [None]:
class SpectrogramDataset(data.Dataset):
    def __init__(
        self,
        df
    ):
        self.df = df  # list of list: [file_path, ebird_code]
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        wav_path = self.df.resampled_filename.iloc[idx]
        ebird_code = self.df.ebird_code.iloc[idx]
        image=np.load(wav_path)
        #labels = np.zeros(len(BIRD_CODE), dtype="i")
        labels = np.zeros(len(BIRD_CODE), dtype="f")
        labels[BIRD_CODE[ebird_code]] = 1

        return image, labels

### Training Utility

In [None]:
def get_model():
    model =getattr(resnest_torch, "resnest50_fast_1s4x24d")(pretrained=True)
    del model.fc
    model.fc = nn.Sequential(
        nn.Linear(2048, 1024), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(1024, 264))
    return model

## Training

### prepare data

#### split data

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

train["fold"] = -1
for fold_id, (train_index, val_index) in enumerate(skf.split(train, train["ebird_code"])):
    train.iloc[val_index, -1] = fold_id
fold_proportion = pd.pivot_table(train, index="ebird_code", columns="fold", values="xc_id", aggfunc=len)
print(fold_proportion.shape)

In [None]:
fold_proportion

In [None]:
use_fold = 2
train_df = train[train.fold!=use_fold].reset_index(drop=True)
val_df = train[train.fold==use_fold].reset_index(drop=True)

print("[fold {}] train: {}, val: {}".format(use_fold, len(train_df), len(val_df)))

## run training

In [None]:
#@title
device = torch.device("cuda:0")

# loaders
loaders = {
    "train": data.DataLoader(SpectrogramDataset(train_df), 
                             batch_size=48, 
                             shuffle=True,
                             num_workers=2, 
                             pin_memory=True, 
                             drop_last=True
                             ),
    "valid": data.DataLoader(SpectrogramDataset(val_df), 
                             batch_size=48, 
                             shuffle=False,
                             num_workers=2,
                             pin_memory=True,
                             drop_last=False)
}

# model
model=get_model()
model.to(device)
# Optimizer
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
# Scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=10)

In [None]:
!mkdir "/content/drive/My Drive/resnet-24-d/"

In [None]:
def training_step(xb,yb,model,loss_fn,opt,device,scheduler):
    xb,yb = xb.to(device), yb.to(device)
    out = model(xb)
    opt.zero_grad()
    loss = loss_fn(out,yb)
    loss.backward()
    opt.step()
    scheduler.step()
    return loss.item()
    
def validation_step(xb,yb,model,loss_fn,device):
    xb,yb = xb.to(device), yb.to(device)
    out = model(xb)
    loss = loss_fn(out,yb)
    return loss.item(),out

In [None]:
from fastprogress.fastprogress import master_bar, progress_bar

In [None]:
!mkdir '/content/drive/My Drive/fold0/'

In [None]:
#torch.cuda.empth_cache()

In [None]:
def fit(epochs,model,train_dl,valid_dl,opt,device=None,loss_fn=F.binary_cross_entropy_with_logits):
    device = device
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=10)
    best_f1=0
    mb = master_bar(range(epochs))
    for epoch in mb:    
        trn_loss,val_loss= 0.0,0.0
        val_preds = []
        val_targs = []
        model.train()
        #For every batch 
        for xb,yb in progress_bar(train_dl,parent=mb):
            trn_loss += training_step(xb,yb,model,loss_fn,opt,device,scheduler)
        trn_loss /= mb.child.total

        #Validation
        model.eval()
        with torch.no_grad():
            for i,(xb,yb) in enumerate(progress_bar(valid_dl,parent=mb)):
                loss,out = validation_step(xb,yb,model,loss_fn,device)
                val_loss += loss
                val_preds.extend(np.argmax(out.detach().cpu().numpy(),axis=1))
                val_targs.extend(np.argmax(yb.detach().cpu().numpy(),axis=1))
                del out
        val_loss /= mb.child.total
        val_f1 = f1_score(val_targs,val_preds,average='micro')
        if val_f1>best_f1:
          best_f1=val_f1
          torch.save(model.state_dict(),f'/content/drive/My Drive/resnet-24-d/PANNS.pth')
        torch.save({'model_state_dict':model.state_dict(),'scheduler_state_dict':scheduler.state_dict(),'optimizer_state_dict':opt.state_dict(),'val_loss':val_loss,'val_f1':val_f1},f'/content/drive/My Drive/resnet-24-d/PANNS_last.pth')
        print(f'Epoch: {epoch},Train_loss: {trn_loss:.5f},Val_roc:{val_f1:.4f},Val_loss:{val_loss:.5f}')
        with open('/content/drive/My Drive/resnet-24-d/log_fold0.txt','a+') as f:
          f.writelines(f'Epoch: {epoch},Train_loss: {trn_loss:.5f},Val_loss:{val_loss:.5f},val_f1:{val_f1:.5f}\n')
        del val_f1
        del trn_loss
        del val_loss
        del val_preds
        del val_targs
    return model

## save results

In [None]:
model=fit(50,model,loaders['train'],loaders['valid'],optimizer,device=device)