In [62]:
!pip install pandas cupy-cuda110 librosa

Collecting librosa
  Downloading librosa-0.10.1-py3-none-any.whl (253 kB)
[K     |████████████████████████████████| 253 kB 11.4 MB/s eta 0:00:01
Collecting soundfile>=0.12.1
  Downloading soundfile-0.12.1-py2.py3-none-any.whl (24 kB)
Collecting scikit-learn>=0.20.0
  Downloading scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.1 MB)
[K     |████████████████████████████████| 11.1 MB 18.1 MB/s eta 0:00:01
Collecting lazy-loader>=0.1
  Downloading lazy_loader-0.4-py3-none-any.whl (12 kB)
Collecting numba>=0.51.0
  Downloading numba-0.58.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.7 MB)
[K     |████████████████████████████████| 3.7 MB 33.0 MB/s eta 0:00:01
[?25hCollecting joblib>=0.14
  Downloading joblib-1.4.0-py3-none-any.whl (301 kB)
[K     |████████████████████████████████| 301 kB 20.6 MB/s eta 0:00:01
[?25hCollecting scipy>=1.2.0
  Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[K     

In [63]:
import pandas as pd
import os
import torch
from IPython.display import display, Audio
import cupy as cp
from cupyx.scipy import signal as cupy_signal
import librosa

In [33]:
import gc
import sys
import math
import numpy as np
from glob import glob
import pandas as pd

ModuleNotFoundError: No module named 'pandas'

In [None]:
import cv2
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import librosa
from scipy import signal as sci_signal

import torch
from torch import nn
from torchvision.models import efficientnet

import albumentations as albu

import pytorch_lightning as pl
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from pytorch_lightning.callbacks import ModelCheckpoint, TQDMProgressBar

# import score function of BirdCLEF
sys.path.append('/kaggle/input/birdclef-roc-auc')
sys.path.append('/kaggle/usr/lib/kaggle_metric_utilities')
from metric import score

## Configuration

In [37]:
class CONFIG:
    
    # == GENERAL ==
    seed = 1917                             # random seed
    device = 'cuda'                         # device to be used
    mix_precision = False                   # whether to use mixed-16 precision
    
    # == DATA ==
    data_dir_2024 = '../input/birdclef-2024'# root folder
    sr = 32000                              # sampling rate
    n_fft = 1095                            # NFFT of Spec.
    win_len = 412                           # WIN_SIZE of Spec.
    hop_len = 100                           # overlap of Spec.
    min_freq = 40                           # min frequency
    max_freq = 15000                        # max frequency
    
    # == MODEL ==
    model = 'efficientnet_b0'               # model architecture
    
    # == DATASET ==
    batch_size = 32                         # batch size of each step
    n_workers = 4                           # number of workers
    
    # == TRAINING ==
    folds = 5                               # n fold
    epochs = 5                              # max epochs
    lr = 1e-3                               # learning rate
    weight_decay = 1e-5                     # weight decay of optimizer
    visualize = True                        # whether to visualize data and batch

In [38]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', device, ', Using', torch.cuda.device_count(), 'GPU(s)')
# pl.seed_everything(CONFIG.SEED, workers=True)

Device: cuda:0 , Using 1 GPU(s)


## Dataframe

In [42]:
df = pd.read_csv(f'{CONFIG.data_dir_2024}/train_metadata.csv')
df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
0,asbfly,[],['call'],39.2297,118.1987,Muscicapa dauurica,Asian Brown Flycatcher,Matt Slaymaker,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/134896,asbfly/XC134896.ogg
1,asbfly,[],['song'],51.403,104.6401,Muscicapa dauurica,Asian Brown Flycatcher,Magnus Hellström,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/164848,asbfly/XC164848.ogg
2,asbfly,[],['song'],36.3319,127.3555,Muscicapa dauurica,Asian Brown Flycatcher,Stuart Fisher,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/175797,asbfly/XC175797.ogg
3,asbfly,[],['call'],21.1697,70.6005,Muscicapa dauurica,Asian Brown Flycatcher,vir joshi,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/207738,asbfly/XC207738.ogg
4,asbfly,[],['call'],15.5442,73.7733,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin & Sergei Karpeev,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/209218,asbfly/XC209218.ogg


In [46]:
label_list = sorted(df['primary_label'].unique())
label_id_list = list(range(len(label_list)))
label2id = dict(zip(label_list, label_id_list))
id2label = dict(zip(label_id_list, label_list))

In [52]:
train_df = df[['primary_label', 'rating', 'filename']].copy()  # Uses only audio, label and rating.

# Create Target
train_df['target'] = train_df.primary_label.map(label2id)

# Create Filepath
train_df['filepath'] = CONFIG.data_dir_2024 + '/train_audio/' + train_df.filename

# Create Name
train_df['name'] = train_df.filename.map(lambda x: x.split('/')[0] + '-' + x.split('/')[-1].split('.')[0])

print(f'find {len(train_df)} samples')
train_df.head()


find 24459 samples


Unnamed: 0,primary_label,rating,filename,target,filepath,name
0,asbfly,5.0,asbfly/XC134896.ogg,0,../input/birdclef-2024/train_audio/asbfly/XC13...,asbfly-XC134896
1,asbfly,2.5,asbfly/XC164848.ogg,0,../input/birdclef-2024/train_audio/asbfly/XC16...,asbfly-XC164848
2,asbfly,2.5,asbfly/XC175797.ogg,0,../input/birdclef-2024/train_audio/asbfly/XC17...,asbfly-XC175797
3,asbfly,4.0,asbfly/XC207738.ogg,0,../input/birdclef-2024/train_audio/asbfly/XC20...,asbfly-XC207738
4,asbfly,4.0,asbfly/XC209218.ogg,0,../input/birdclef-2024/train_audio/asbfly/XC20...,asbfly-XC209218


In [57]:
def oog2spec(audio_data):
    
    audio_data = cp.array(audio_data)
    
    # handles NaNs
    mean_signal = cp.nanmean(audio_data)
    audio_data = cp.nan_to_num(audio_data, nan=mean_signal) if cp.isnan(audio_data).mean() < 1 else cp.zeros_like(audio_data)
    
    # to spec.
    frequencies, times, spec_data = cupy_signal.spectrogram(
        audio_data, 
        fs=CONFIG.sr, 
        nfft=CONFIG.n_fft, 
        nperseg=CONFIG.win_len, 
        noverlap=CONFIG.hop_len, 
        window='hann'
    )
    
    # Filter frequency range
    valid_freq = (frequencies >= CONFIG.min_freq) & (frequencies <= CONFIG.max_freq)
    spec_data = spec_data[valid_freq, :]
    
    # Log
    spec_data = cp.log10(spec_data + 1e-20)
    
    # min/max normalize
    spec_data = spec_data - spec_data.min()
    spec_data = spec_data / spec_data.max()
    
    return spec_data.get()

In [58]:
train_df.iloc[0].filepath

'../input/birdclef-2024/train_audio/asbfly/XC134896.ogg'

In [59]:
# load ogg
audio_data, _ = librosa.load(train_df.iloc[0].filepath, sr=CONFIG.sr)

NameError: name 'librosa' is not defined

In [None]:
if CONFIG.LOAD_DATA:
    print('load from file')
    all_bird_data = np.load(f'{config.PREPROCESSED_DATA_ROOT}/spec_center_5sec_256_256.npy', allow_pickle=True).item()
else:
    all_bird_data = dict()
    for i, row_metadata in tqdm(train_df.iterrows()):

        

        # crop
        n_copy = math.ceil(5 * config.FS / len(audio_data))
        if n_copy > 1: audio_data = np.concatenate([audio_data]*n_copy)

        start_idx = int(len(audio_data) / 2 - 2.5 * config.FS)
        end_idx = int(start_idx + 5.0 * config.FS)
        input_audio = audio_data[start_idx:end_idx]

        # ogg to spec.
        input_spec = oog2spec_via_cupy(input_audio)
        
        input_spec = cv2.resize(input_spec, (256, 256), interpolation=cv2.INTER_AREA)

        all_bird_data[row_metadata.samplename] = input_spec.astype(np.float32)

    # save to file
    np.save(os.path.join(config.OUTPUT_DIR, f'spec_center_5sec_256_256.npy'), all_bird_data)

## Dataset

In [31]:
class Dataset(torch.utils.data.Dataset):
    
    def __init__(self, bird_data, augmentation=None):
        super().__init__()
        self.bird_data = bird_data
        self.keys_list = list(bird_data.keys())
        self.augmentation = augmentation
    
    def __len__(self):
        return len(self.bird_data)
    
    def __getitem__(self, index):
        
        _spec = self.bird_data[self.keys_list[index]]
        
        if self.augmentation is not None:
            _spec = self.augmentation(image=_spec)['image'] 
        
        return torch.tensor(_spec, dtype=torch.float32)

## Dataloader