Loading and encoding all the data takes about 90 minutes on a CPU. Let's start our investigation by loading up all the file names and picking some species that are well represented to examine more closely.

In [1]:
import arrow
import glob
import os
import pandas as pd

PATHNAME = '/kaggle/input/birdclef-2024/train_audio/*/*.ogg'

files_df = pd.DataFrame(data=list(glob.glob(pathname=PATHNAME, recursive=True)), columns=['file'])
files_df['short name'] = files_df['file'].apply(func=os.path.basename)
files_df['species'] = files_df['file'].apply(func=lambda x: x.split('/')[-2])
print('file count: {}'.format(len(files_df)))
time_start = arrow.now()
print('{} done'.format(arrow.now() - time_start))

file count: 24459
0:00:00.000160 done


In [2]:
files_df['species'].nunique()

182

We have a lot of classes and they are not all equally well represented, so let's look at the top classes.

In [3]:
from plotly import express
express.histogram(data_frame=files_df['species'].value_counts().to_frame().head(n=30).reset_index(), x='species', y='count')

In [4]:
files_df['species'].value_counts().head(n=3).index

Index(['lirplo', 'eaywag1', 'grnsan'], dtype='object', name='species')

Let's start with the top three: 

In [5]:
import arrow
import librosa
import os
import pandas as pd
from glob import glob

LENGTH = 4096
PATHNAME = '/kaggle/input/birdclef-2024/train_audio/*/*.ogg'
RATE = 22050
SPECIES = ['lirplo', 'eaywag1', 'grnsan']


time_start = arrow.now()
data = []
count = 0
last_folder = ''
for input_file in glob(pathname=PATHNAME, recursive=True):
    folder = input_file.split('/')[-2]
    if folder in SPECIES:
        name = os.path.basename(input_file)
        audio, _ = librosa.load(input_file, sr=RATE)
        features = librosa.feature.mfcc(y=audio, sr=RATE, n_mfcc=1)
        count += 1
        if folder != last_folder:
            print('{} {} {} {}'.format(arrow.now() - time_start, count, last_folder, name))
            last_folder = folder
        data.append(pd.Series(data=[folder, name, features[0][:4096]], index=['species', 'file', 'data']))
    
df = pd.DataFrame(data=data)
df['length'] = df['data'].apply(func=len)

print('{} done'.format(arrow.now() - time_start))

0:00:15.863145 1  XC842850.ogg
0:01:20.836790 501 grnsan XC702609.ogg
0:02:34.197205 1001 eaywag1 XC541214.ogg
0:03:52.877237 done


In [6]:
from plotly import express

express.histogram(data_frame=df, x='length', color='species', facet_col='species')

We may need to do something about the fact that our lengths have outliers.

In [7]:
# https://wandb.ai/mostafaibrahim17/ml-articles/reports/An-Introduction-to-Audio-Classification-with-Keras--Vmlldzo0MDQzNDUy

In [8]:
import arrow
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

time_start = arrow.now()
encoder = LabelEncoder()
labels = to_categorical(encoder.fit_transform(X=df['species']))

X_train, X_test, y_train, y_test = train_test_split(df['data'], labels, test_size=0.2, random_state=2024, stratify=labels)
print('{}: done.'.format(arrow.now() - time_start))

2024-04-05 15:11:23.422718: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-05 15:11:23.422860: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-05 15:11:23.584879: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


0:00:00.013476: done.


In [9]:
df['length'].max()

4096

In [10]:
from keras.layers import Activation
from keras.layers import Conv1D
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Input
from keras.layers import Flatten
from keras.layers import MaxPooling1D
from keras.models import Sequential


model = Sequential()
model.add(Input(shape=(df['length'].max(), 1)))
model.add(Conv1D(64, 3, padding='same', activation='relu', ))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Conv1D(128, 3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(encoder.classes_), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()