In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import torch
import torchaudio
from torch.utils.data import Dataset
from torchvision.transforms import Resize

import numpy as np
from scipy import signal

import matplotlib.pyplot as plt

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if filename[-3:]!='ogg':
            print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
sample_submission = pd.read_csv('/kaggle/input/birdclef-2022/sample_submission.csv')
#scored_birds = pd.read_csv('/kaggle/input/birdclef-2022/scored_birds.json')
taxonomy = pd.read_csv('/kaggle/input/birdclef-2022/eBird_Taxonomy_v2021.csv')
test = pd.read_csv('/kaggle/input/birdclef-2022/test.csv')
train_metadata = pd.read_csv('/kaggle/input/birdclef-2022/train_metadata.csv')

In [None]:
train_metadata.head(5)

# Birds species in danger

If we suppose that the occurence of each bird species in this dataset is correlated to its extinction rate, we can get an idea of which species are endagered and which species are not. 

In [None]:
import plotly.express as px
import geopandas as gpd

geo_df = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))

#px.set_mapbox_access_token(open(".mapbox_token").read())
fig = px.scatter_geo(geo_df,
                    lat=train_metadata['latitude'],
                    lon=train_metadata['longitude'],
                    color = train_metadata['primary_label'],
                    hover_name=train_metadata['primary_label'])
fig.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10),dpi=400)
count = train_metadata['primary_label'].value_counts()
count[count.values>100].plot(kind='barh', title='birds not in danger of extinction')
plt.figure(figsize=(20,20),dpi=400)
count[count.values<=100].plot(kind='barh', title='birds in danger of extinction')

#count = count.plot(kind = 'barh')
#sns.countplot(train_metadata['primary_label'])

# Time plot of a bird's chirp

In [None]:
# pick a random file and plot it
file_index = 290

sound = r'/kaggle/input/birdclef-2022/train_audio/'+ train_metadata['filename'][file_index]
wf, sr = torchaudio.load(sound)
print(wf.shape)
wft = wf.t().numpy() # convert from tensor to numpy
print(wf.shape)
print('sample rate:', sr//1000, 'khz')
plt.figure(figsize=(18,5))
plt.plot(wft)

# Mel Spectrogram with torchaudio

To get the Mel spectrogram, we first apply MelSpectrogram transform from toraudio, then calculate the log2 value of the result to get values in db.

In [None]:
# Mel spectrom with torch
specgram = torchaudio.transforms.MelSpectrogram(n_fft=4096)(wf)
specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
plt.pcolormesh(specgram[0])

# Mel spectrogram with Scipy

In [None]:
# Mel spectrom with scipy
def spectrogram(xs):
    f, t, spec = signal.spectrogram(xs, fs=sr, nfft=256, window=('hann'))
    wave_sxx_graph = np.log2(np.absolute(spec))
    return f,t,wave_sxx_graph

In [None]:
# Calculate spectrogram using math
t,f,wave_sxx_graph = spectrogram(wft[:,0])
print(wave_sxx_graph.shape)
print(t.shape)
print(f.shape)
#plt.figure(0)
plt.pcolormesh(f,t, wave_sxx_graph)
#plt.figure(1)
#plt.pcolormesh(t, f, spec[1,:])
#plt.show()

# Encode labels

In [None]:
global labels
labels = pd.Series(train_metadata['primary_label'].unique(),name='bird')
labels = labels.reset_index()
num=labels.index.size
display(labels)

In [None]:
from torch.nn.functional import one_hot
labels['onehot_code'] = one_hot(torch.Tensor(labels['index'].values).to(torch.int64), num_classes = num).numpy().tolist()

#### labels.head(10)

# DataLoader and transforms Pytorch

This custom dataloader loads the salples by their index, then apply a transform function on each sample by computing its Mel spectrogram and then resize the Mel spectrum to a constant size of (128,128); This is done by the Resize function from TorchVision which down samples or interpolates the input depending on its shape.

In [None]:
annotation_file = r'/kaggle/input/birdclef-2022/train_metadata.csv'
chirp_dir = r'/kaggle/input/birdclef-2022/train_audio'
class CustomChirpDataset(Dataset):
    def __init__(self, annotations_file, chirp_dir, transform=None, target_transform=False):
        self.metadata = pd.read_csv(annotations_file)
        self.chirp_labels = self.metadata['filename']
        self.chirp_dir = chirp_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.chirp_labels)

    def __getitem__(self, idx):
        chirp_path = os.path.join(self.chirp_dir, self.chirp_labels.iloc[idx])
        chirp, sr = torchaudio.load(chirp_path)
        label = self.chirp_labels.iloc[idx]
        if self.transform:
            spec = self.melspectrogram(chirp)
        if self.target_transform:
            label = self.encode_label(label)
        return spec, label
    def encode_label(self,label):
        label,filename = os.path.split(label)
        onehot_label = labels[labels['bird']==label]['onehot_code'].values[0]
        return onehot_label
    def melspectrogram(self,chirp):
        spectrum = torchaudio.transforms.MelSpectrogram(n_fft = 1000, n_mels=128)(chirp)
        spectrum = torchaudio.transforms.AmplitudeToDB()(spectrum)
        spectrum = torch.unsqueeze(spectrum, dim=0)
        # resize input to so all spectrograms have the same (128,128)
        spectrum = Resize((128, 128))(spectrum)
        return spectrum[0,0,:,:]

## Test dataloader

In [None]:
# init dataset
dataset = CustomChirpDataset(annotation_file, chirp_dir, transform=True, target_transform=True)
for j in [0,1000,2000,3000,4000,5000]:
    #plt.subplot(1,6, j//1000 +1)
    plt.figure(figsize = (18,6))
    # select some chirp
    chirp,label = dataset.__getitem__(j)
    print(chirp.shape)
    plt.pcolormesh(chirp)
    
    plt.show()