## **Import Libs**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset

import librosa
from scipy.signal import stft

from IPython.display import Audio

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, classification_report, confusion_matrix
import os
import glob
from tqdm.notebook import tqdm, trange

import torch.multiprocessing as mp
from torch.utils.data.distributed import DistributedSampler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import init_process_group, destroy_process_group
import os

In [2]:
# config
device = 'cuda' if torch.cuda.is_available() else 'cpu'
sr = 32000 # sampling rate
seq_len = sr * 5 # ?
width = int(sr * 0.5) # frame_size ?
hop = int(sr*0.25) # hop_size

In [3]:
# read function
def read_data():
    df = pd.read_csv('data/birdclef-2023/train_metadata.csv')
    df['filepath'] = df['filename'].apply(lambda x: 'data/birdclef-2023/train_audio/' + x)
    return df

In [4]:
# balance data
def balance_df(df):
    sample_count = max(df.primary_label.value_counts())
    
    balanced_df = []
    augmentation_proba = {}
    
    for i, label in enumerate(df.primary_label.unique()):
        selected_ids = np.random.choice(df[df['primary_label'] == label].index, sample_count)
        balanced_df.append(df.loc[selected_ids])
        augmentation_proba[label] = 1 - (len(df[df['primary_label'] == label]) / sample_count)
    balanced_df = pd.concat(balanced_df)
    return balanced_df, augmentation_proba

In [5]:
# balance test
def balance_test(df):
    sample_count = 10
    
    balanced_df = []
    augmentation_proba = {}
    
    for i, label in enumerate(df.primary_label.unique()):
        selected_ids = np.random.choice(df[df['primary_label'] == label].index, sample_count) \
            if len(df[df['primary_label'] == label]) < sample_count \
            else df[df['primary_label'] == label].index
        balanced_df.append(df.loc[selected_ids])
        augmentation_proba[label] = 1 - (len(df[df['primary_label'] == label]) / sample__count) \
            if len(df[df['primary_label'] == label]) < sample__count else 0
    balanced_df = pd.concat(balanced_df, axis=0)
    
    return balanced_df, augmentation_proba

In [6]:
def stratified_train_test_split(df, test_size=0.3):
    train_data, test_data = [], []
    
    for i, label in enumerate(df.primary_label.unique()):
        if len(df[df.primary_label == label]) == 1:
            train = df[df.primary_label == label]
            test = df[df.primary_label == label]
        else:
            train, test = train_test_split(df[df.primary_label == label], test_size=test_size)
        train_data.append(train)
        test_data.append(test)
        
    train_data = pd.concat(train_data)
    test_data = pd.concat(test_data)
    
    return train_data, test_data

In [7]:
df = read_data()
train, test = stratified_train_test_split(df, test_size=0.2)

In [8]:
train, aug_prob = balance_df(train)

In [9]:
label2idx = {label: idx for idx, label in enumerate(train['primary_label'].unique())}
idx2label = {idx: label for idx, label in enumerate(train['primary_label'].unique())}

In [10]:
class BirdDataset(Dataset):
    def __init__(self, df, width, hop, seq_len=seq_len):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.width = width
        self.hop = hop
        self.seq_len = seq_len
    
    def __len__(self):
        return len(self.df)
    
    def prepare_X(self, path):
        wav, _ = librosa.load(path, sr=sr)
        
        if len(wav) < self.seq_len:
            wav = np.append(wav, np.zeros(self.seq_len - len(wav)))
        
        start_idx = np.random.choice(range(len(wav) - self.seq_len)) if len(wav) > self.seq_len else 0
        wav = wav[start_idx : start_idx + self.seq_len]
        num_slices = int((len(wav) - self.width) / (self.width - self.hop))
        wav = [wav[i*self.width - i*self.hop: (i+1)*self.width - i*self.hop] for i in range(num_slices)]
        return wav
    
    def __getitem__(self, idx):
        if self.__len__() <= idx:
            raise KeyError
        
        wav_path = self.df.loc[idx, 'filepath']
        wav = self.prepare_X(wav_path)
        wav = torch.tensor(wav).to(device)
        
        label = self.df.loc[idx, 'primary_label']
        label = label2idx[label]
        y = torch.zeros(264)
        y[label] = 1
        label = torch.tensor(label).to(device)
        return wav, y

In [11]:
train = BirdDataset(train, width, hop)
test = BirdDataset(test, width, hop)

train_loader = DataLoader(train, batch_size=8, sampler=DistributedSampler(train_loader))
test_loader = DataLoader(test, batch_size=8, sampler=DistributedSampler(test_loader))

In [12]:
class WaveBlock(nn.Module):
    def __init__(self, in_features, filters, kernel_size, n):
        super().__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.n = n
    
        self.cas_conv1 = nn.Conv1d(in_features, filters, 1)

        dilation_rates = [2**i for i in range(n)]
        self.tanh_out_layers = nn.ModuleList([])
        self.sig_out_layers = nn.ModuleList([])
        self.cas_conv_layers = nn.ModuleList([])

        for dilation_rate in dilation_rates:
            tanh_out = nn.Sequential(*[nn.Conv1d(filters, filters, kernel_size, dilation=dilation_rate, padding='same'), nn.Tanh()])
            self.tanh_out_layers.append(tanh_out)
            sig_out = nn.Sequential(*[nn.Conv1d(filters, filters, kernel_size, dilation=dilation_rate, padding='same'), nn.Sigmoid()])
            self.sig_out_layers.append(sig_out)
            self.cas_conv_layers.append(nn.Conv1d(filters, filters, 1))
        
    def forward(self, x):
        x = self.cas_conv1(x)
        res_x = x
        
        for tanh_layer, sig_layer, conv_layer in zip(self.tanh_out_layers, self.sig_out_layers, self.cas_conv_layers):
            x = tanh_layer(x) * sig_layer(x)
            x = conv_layer(x)
        x = x + res_x
        del res_x
        return x

In [13]:
class BirdNet(nn.Module):
    def __init__(self, temporal_fearture_size=64, kernel_size=3, hidden_size=256, num_classes=264):
        super().__init__()
        self.representation_block = nn.Sequential(*[
            WaveBlock(1, 8, kernel_size, 16),
            nn.BatchNorm1d(8),
            nn.ReLU(),
            WaveBlock(8, 16, kernel_size, 8),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            WaveBlock(16, 32, kernel_size, 4),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            WaveBlock(32, temporal_fearture_size, kernel_size, 1)
        ])
        self.temporal_block = nn.LSTM(temporal_fearture_size, hidden_size)
        self.classifier = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x = torch.concat([self.representation_block(x[:, i, :].unsqueeze(1)).unsqueeze(0) for i in range(x.shape[1])])
        x = torch.mean(x, dim=-1)
        x = torch.sum(self.temporal_block(x)[0], axis=0)
        x = nn.ReLU()(x)
        x = self.classifier(x)
        x = nn.Sigmoid()(x)
        return x

In [17]:
torch.cuda.empty_cache()

In [15]:
epochs = 5
model = BirdNet().to(device)
criterion = nn.BCELoss()
lr = 1e-4
optimizer = Adam(model.parameters(), lr=lr)

In [16]:
# history = {}

# for epoch in trange(epochs):
#     model.train()
#     for i, (X, y) in enumerate(tqdm(train_loader)):
#         X = X.to(device).to(torch.float)
#         y = y.to(device).to(torch.float)
        
#         optimizer.zero_grad()
#         pred = model(X)
#         loss = criterion(pred, y)
#         loss.backward()
#         optimizer.step()

#         if (i%100 == 0) & (i != 0):
#             preds = []
#             ys = []
#             with torch.no_grad():
#                 for j, (X, y) in enumerate(test_loader):
#                     X = X.to(device).to(torch.float)
#                     y = y.to(device).to(torch.float)
                    
#                     pred = model(X)
#                     preds.append(pred)
#                     ys.appedn(y)
#                 preds = torch.concat(preds, dim=0).detach().cpu().numpy()
#                 ys = torch.concat(ys, dim=0).detech().cpu().numpy()
                
#                 scores = []
#                 for i in range(preds.shape[-1]):
#                     score = f1_score(ys[:, i], (preds[:, i] > .5).astype(int))
#                     scores.append(score)
#                 plt.figure(figsize=(15, 5))
#                 plt.bar(x=range(264), height=scores)
#                 plt.show()

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/26400 [00:00<?, ?it/s]

  wav = torch.tensor(wav).to(device)
  return F.conv1d(input, weight, bias, self.stride,


KeyboardInterrupt: 

In [None]:
def ddp_setup(rank, world_size):
    os.environ("MASTER_ADDR") = "localhost"
    os.environ("MASTER_PORT") = "12355"
    init_process_group(backend="nccl", rank=rank, world_size=world_size)
    torch.cuda.set_device(rank)

In [20]:
class Trainer:
    def __init__(self, model, train_loader, test_loader, optimizer, gpu_id, save_energy, criterion):
        self.gpu_id = gpu_id
        self.model = model.to(gpu_id)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.optimizer = optimizer
        self.save_energy = save_energy
        self.criterion = criterion
        self.model = DDP(model, device_ids=[gpu_id])
    
    def _run_batch(self, source, targets):
        self.optimizer.zero_grad()
        output = self.model(source)
        loss = criterion(output, targets)
        loss.backward()
        self.optimizer.step()
        
    def _run_epoch(self, epoch):
        b_sz = len(next(iter(self.train_loader))[0])
        print(f"[GPU{self.gpu_id}] Epoch {epoch} | Batchsize: {b_sz} | Steps: {len(self.train_data)}")
        self.train_data.sampler.set_epoch(epoch)
        for source, targets in self.train_data:
            source = source.to(self.gpu_id)
            targets = targets.to(self.gpu_id)
            self._run_batch(source, targets)
        preds = []
        ys = []
        with torch.no_grad():
            for X, y in self.test_loader:
                X = X.to(device).to(torch.float)
                y = y.to(device).to(torch.float)
                
                pred = model(X)
                preds.append(pred)
                ys.append(y)
            preds = torch.concat(preds, dim=0).detach().cpu().numpy()
            ys = torch.concat(ys, dim=0).detach().cpu().numpy()
            
            scores = []
            for i in range(preds.shape[-1]):
                score = f1_score(ys[:, i], (preds[:, i] > .5).astype(int))
                scores.append(score)
            plt.figure(figsize=(15, 5))
            plt.bar(x=range(264), height=scores)
            plt.show()
            
    def _save_checkpoint(self, epoch):
        ckp = self.model.module.state_dict()
        PATH = 'checkpoint.pt'
        torch.save(ckp, PATH)
        print(f"Epoch {epoch} | Training checkpoint saved at {PATH}")

    def train(self, max_epochs):
        for epoch in range(max_epochs):
            self._run_epoch(epoch)
            if self.gpu_id == 0 and epoch % self.save_energy == 0:
                self._save_checkpoint(epoch)

In [None]:
def main(rank: int, world_size: int, save_every: int, total_epochs: int, batch_size: int):
    ddp_setup(rank, world_size)
    train_set, test_set, model, optimizer, criterion = load_train_objs()
    train_loader = prepare_dataloader(train_set, batch_size)
    test_loader = prepare_dataloader(test_set, batch_size)
    trainer = Trainer(model, train_loader, test_loader, optimizer, rank, save_every, criterion)
    trainer.train(total_epochs)
    destroy_process_group()