In [None]:
%matplotlib inline

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

from glob import glob
import gc

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

from tqdm import tqdm, tqdm_notebook
tqdm.pandas()

from sklearn.model_selection import train_test_split
import librosa
import librosa.display
from IPython.display import Audio
import wave
from scipy.io import wavfile

import plotly.express as px #Plotly Express

from plotly.offline import iplot
#to link plotly to pandas
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline = False, world_readable = True)

plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams['axes.titlesize'] = 16
plt.style.use('seaborn-whitegrid')
sns.set_palette('Set3')

import tensorflow as tf

import os
print(os.listdir('../input/birdclef-2021/'))

import warnings
warnings.simplefilter('ignore')

In [None]:
base_dir = '../input/birdclef-2021/'

In [None]:
train_meta = pd.read_csv(base_dir + 'train_metadata.csv')
print(train_meta.shape)
train_meta.head()

In [None]:
train_labels = pd.read_csv(base_dir + 'train_soundscape_labels.csv')
print(train_labels.shape)
train_labels.head()

In [None]:
sns.countplot(data = train_labels, x = 'site');

In [None]:
sns.countplot(data = train_meta, x = 'rating');

In [None]:
train_labels['birds'].value_counts()

- There are audios with more than one bird sound

__Loading an Audio file__

In [None]:
sample_audio = base_dir + 'train_short_audio/rucwar/XC133150.ogg'

signal, sr = librosa.load(sample_audio)

print(f"Sample rate  : {sr}")
print(f"Signal Length: {len(signal)}")
print(f"Duration     : {len(signal) / sr} secs")

In [None]:
signal, _ = librosa.load(sample_audio, sr = 44100, duration = 15)

plt.figure(figsize = (20, 5))
librosa.display.waveplot(signal)
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

plt.figure(figsize = (20, 5))
mels = librosa.feature.melspectrogram(y = signal, sr = 44100, n_mels = 256, fmax = 8000)
librosa.display.specshow(librosa.power_to_db(mels, ref = np.max), x_axis = 'time', y_axis = 'mel')
plt.title('Melspectrogram')
plt.colorbar()
plt.show()

#Short-term Fourier Transform
plt.figure(figsize = (20, 5))
stft = librosa.stft(y = signal)
stft_db = librosa.amplitude_to_db(stft)
librosa.display.specshow(stft_db, x_axis = 'time', y_axis = 'hz')
plt.title('Spectrogram - STFT')
plt.colorbar()
plt.show()

#Log Frequency Axis
plt.figure(figsize = (20, 5))
librosa.display.specshow(stft_db, sr = 44100, x_axis = 'time', y_axis = 'log')
plt.colorbar()
plt.title('Log Frequency Axis')
plt.show()

Audio(sample_audio, rate = 44100)

__Zero Crossings by Librosa__

In [None]:
n0 = 10000
n1 = 10200

plt.figure(figsize = (20, 5))
plt.plot(signal[n0: n1])
plt.title('Zooming in the Signal')
plt.grid()
plt.show()

In [None]:
zero_crossings = librosa.zero_crossings(signal[n0: n1], pad = False)
print(zero_crossings.shape)
print(f"Number of Zero crossings: {sum(zero_crossings)}")

__Recordings Count by Year__

In [None]:
train_meta['year'] = train_meta['date'].apply(lambda x: x.split('-')[0])
train_meta['month'] = train_meta['date'].apply(lambda x: x.split('-')[1])
train_meta['day'] = train_meta['date'].apply(lambda x: x.split('-')[2])
#train_meta.head(2)

- There are year with vlaues 0000, 0201, 0199 and 0202, we fix them below

In [None]:
train_meta['year'] = train_meta['year'].apply(lambda x: x if x[:2] in ['19', '20'] else np.nan)
train_meta['year'].fillna(train_meta['year'].value_counts().index[0], inplace = True)

In [None]:
temp = train_meta['year'].value_counts()
px.bar(x = temp.index, y = temp.values, 
      title = 'Number of Recordings by Year',
      labels = {'x': 'Year', 'y': 'Count'}
      )

In [None]:
temp = train_meta.groupby('month')['primary_label'].count()
px.bar(x = temp.index, y = temp.values, 
      title = 'Number of Recordings by Month',
      labels = {'x': 'Months', 'y': 'Count'}
      )

In [None]:
temp = pd.pivot_table(data = train_meta, index = 'primary_label', columns = 'month', values = 'secondary_labels', 
                      aggfunc = 'count')

In [None]:
t = temp.T.iloc[:, :10].fillna(0)
px.line(t, 
       title = 'Bird Recordings by Month',
       labels = {'months': 'Months', 'value': 'Num of Recordings'}, 
    )

In [None]:
t = temp.T.iloc[:, 11:21].fillna(0)
px.line(t, 
       title = 'Bird Recordings by Month',
       labels = {'months': 'Months', 'value': 'Num of Recordings'}, 
    )

In [None]:
t = temp.T.iloc[:, 300:321].fillna(0)
px.line(t, 
       title = 'Bird Recordings by Month',
       labels = {'months': 'Months', 'value': 'Num of Recordings'}, 
    )

In [None]:
t = temp.T.iloc[:, 322:351].fillna(0)
px.line(t, 
       title = 'Bird Recordings by Month',
       labels = {'months': 'Months', 'value': 'Num of Recordings'}, 
    )

In [None]:
print(f"Number of birds in train_short_audio: {len(os.listdir(base_dir + 'train_short_audio/'))}")
print(f"Number of audio files in train_soundscapes: {len(os.listdir(base_dir + 'train_soundscapes/'))}")

- Let's check the birds and their associated audio files

In [None]:
audio_path = base_dir + 'train_short_audio/'
birds_audio = {}
for bird in os.listdir(audio_path):
    birds_audio[bird] = len(os.listdir(audio_path + bird))
birds_df = pd.DataFrame(birds_audio.items())
birds_df.columns = ['Birds', 'Num_Audio']
birds_df = birds_df.sort_values(by = 'Num_Audio', ascending = False)
px.bar(birds_df, x = 'Birds', y = 'Num_Audio')

# Bird Recording Location on World Map
- Click the the location dots for the bird name

In [None]:
import folium
import branca
import branca.colormap as cm

bird = np.random.choice(birds_df['Birds'], 1)[0]
temp = train_meta[['latitude', 'longitude']][train_meta['primary_label'] == bird] 

bird_map = folium.Map(prefer_canvas = True, zoom_start = 10)

for lat, long in zip(temp['latitude'], temp['longitude']): 
    folium.CircleMarker(location = [lat, long], 
                   radius = 1, 
                    color = 'blue',
                        popup = bird, 
                            weight = 5).add_to(bird_map)

bird = np.random.choice(birds_df['Birds'], 1)[0]
temp = train_meta[['latitude', 'longitude']][train_meta['primary_label'] == bird] 
for lat, long in zip(temp['latitude'], temp['longitude']): 
    folium.CircleMarker(location = [lat, long], 
                   radius = 1, 
                    color = 'red',
                        popup = bird,
                            weight = 5).add_to(bird_map)

bird = np.random.choice(birds_df['Birds'], 1)[0]
temp = train_meta[['latitude', 'longitude']][train_meta['primary_label'] == bird] 
for lat, long in zip(temp['latitude'], temp['longitude']): 
    folium.CircleMarker(location = [lat, long], 
                   radius = 1, 
                    color = 'green',
                        popup = bird,
                            weight = 5).add_to(bird_map)

bird = np.random.choice(birds_df['Birds'], 1)[0]
temp = train_meta[['latitude', 'longitude']][train_meta['primary_label'] == bird] 
for lat, long in zip(temp['latitude'], temp['longitude']): 
    folium.CircleMarker(location = [lat, long], 
                   radius = 1, 
                    color = 'yellow',
                        popup = bird,
                            weight = 5).add_to(bird_map)
    

bird_map.fit_bounds(bird_map.get_bounds())
#bird_map.add_child(folium.LatLngPopup())

bird_map

# Migratory Pattern of Top Birds

In [None]:
bird = train_labels['birds'].value_counts().index[1]
temp = train_meta[['latitude', 'longitude', 'month']][train_meta['primary_label'] == bird] 

bird_map = folium.Map(prefer_canvas = True, zoom_start = 10)

colormap = cm.LinearColormap(colors = ['red','lightblue', 'blue'], index = [1, 12], 
                             vmin = 1, vmax = 12)
colormap.caption = bird.upper()
colormap.add_to(bird_map)
                             
for mon in temp['month'].unique():
    longlat = temp[temp['month'] == mon][['latitude', 'longitude']]
    for loc in zip(longlat['latitude'], longlat['longitude']): 
        folium.CircleMarker(location = loc, 
                       radius = 1, 
                        color = colormap(int(mon)),
                            popup = mon, 
                                weight = 5).add_to(bird_map)
bird_map

In [None]:
bird = train_labels['birds'].value_counts().index[2]
temp = train_meta[['latitude', 'longitude', 'month']][train_meta['primary_label'] == bird] 

bird_map = folium.Map(prefer_canvas = True, zoom_start = 10)

colormap = cm.LinearColormap(colors = ['red','lightblue', 'blue'], index = [1, 12], 
                             vmin = 1, vmax = 12)
colormap.caption = bird.upper()
colormap.add_to(bird_map)
                             
for mon in temp['month'].unique():
    longlat = temp[temp['month'] == mon][['latitude', 'longitude']]
    for loc in zip(longlat['latitude'], longlat['longitude']): 
        folium.CircleMarker(location = loc, 
                       radius = 1, 
                        color = colormap(int(mon)),
                            popup = mon, 
                                weight = 5).add_to(bird_map)
bird_map

In [None]:
bird = train_labels['birds'].value_counts().index[3]
temp = train_meta[['latitude', 'longitude', 'month']][train_meta['primary_label'] == bird] 

bird_map = folium.Map(prefer_canvas = True, zoom_start = 10)

colormap = cm.LinearColormap(colors = ['red','lightblue', 'blue'], index = [1, 12], 
                             vmin = 1, vmax = 12)
colormap.caption = bird.upper()
colormap.add_to(bird_map)
                             
for mon in temp['month'].unique():
    longlat = temp[temp['month'] == mon][['latitude', 'longitude']]
    for loc in zip(longlat['latitude'], longlat['longitude']): 
        folium.CircleMarker(location = loc, 
                       radius = 1, 
                        color = colormap(int(mon)),
                            popup = mon, 
                                weight = 5).add_to(bird_map)
bird_map

- Let's first consider only quality data - rating > 3.5

In [None]:
train = train_meta[train_meta['rating'] > 3.5]
train.shape

In [None]:
top_birds = train['primary_label'].value_counts()[train['primary_label'].value_counts().values > 75].index
train = train[train['primary_label'].isin(top_birds)]
train.shape

__Create a dataframe with primary_label, filename and filepath__

In [None]:
base = base_dir + 'train_short_audio/'

df = train[['primary_label', 'filename']].sample(frac = 1).reset_index(drop = True)
df['filepath'] = base + df['primary_label'].astype(str) + '/' + df['filename'].astype(str)
df.head(2)

In [None]:
print(f"Number of classes in sample df: {df['primary_label'].nunique()}")

__Transform the labels into Multi-label__

In [None]:
df = pd.concat([df, pd.get_dummies(df['primary_label'])], axis = 1)
print(df.shape)
df.head(2)

In [None]:
target_birds = df['primary_label'].unique()
target_birds.__len__()

__Extract Spectrogram using Librosa__

In [None]:
BATCH_SIZE = 64
sr = 32000
length = sr * 2
print(f"Sampling Rate: {sr}\nLength: {length}")

In [None]:
def pad_audio(data): #for length less than sr * 2
    if len(data) >= length: return data
    else: return np.pad(data, pad_width = (length - len(data), 0), mode = 'constant', constant_values = (0, 0))

def chop_audio(samples): #for length > than sr * 2
    offset = np.random.randint(0, len(samples) - length)
    return samples[offset: offset + length]

def load_mels_spec(audio):
    signal, _ = librosa.load(audio, sr = sr, duration = 15)
    signal, _ = librosa.effects.trim(signal)
    signal = pad_audio(signal)
    if len(signal) > length:
        signal = chop_audio(signal)
    mels = librosa.feature.melspectrogram(y = signal, sr = sr, n_mels = 256, fmin = 20, fmax = sr / 2.0)
    mels_db = librosa.power_to_db(mels, ref = np.max)
    return mels

In [None]:
class AudioDataGen(tf.keras.utils.Sequence):
    def __init__(self, data, batch_size, shuffle = False):
        self.data  = data
        self.labels = self.data[target_birds]
        self.shuffle  = shuffle
        self.batch_size = batch_size
        self.list_idx = self.data.index.values
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.ceil(float(len(self.data)) / float(self.batch_size)))
    
    def __getitem__(self, index):
        batch_idx = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        
        idx = [self.list_idx[k] for k in batch_idx]
        
        #Data   = np.zeros((len(batch_idx), self.dim, self.dim, 3), dtype = 'float32')
        Data = []
        Target = []
        for i, k in enumerate(idx):
            audio = load_mels_spec(self.data['filepath'][k])
            
            # assign 
            Data.append(audio)
            Target.append(self.labels.loc[k].values)
            
        Data = np.expand_dims(np.array(Data), -1)
        Target = np.array(Target)
            
        return Data, Target
    
    def on_epoch_end(self):
        self.indices = np.arange(len(self.list_idx))
        if self.shuffle:
            np.random.shuffle(self.indices)

In [None]:
traingen = AudioDataGen(data = df, batch_size = BATCH_SIZE)

for d, l in traingen:
    print(d.shape)
    print(l.shape)
    break
    
del traingen
gc.collect()

In [None]:
print(tf.__version__)
import tensorflow.keras.layers as L
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

__Let's build a simple tesorflow model__

In [None]:
def get_2d_model(input_shape = (256, 126, 1), learning_rate = 0.001):
    
    
    inp = L.Input(shape = input_shape)
    
    x = L.Conv2D(96, (4,10), padding = "same")(inp)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    x = L.MaxPool2D()(x)
    
    x = L.Conv2D(64, (4,10), padding = "same")(x)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    x = L.MaxPool2D()(x)
    
    x = L.Conv2D(48, (4,10), padding = "same")(x)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    x = L.MaxPool2D()(x)
    
    x = L.Conv2D(32, (4,10), padding = "same")(x)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    x = L.MaxPool2D()(x)
    x = L.Flatten()(x)
    
    x = L.Dropout(0.5)(x)
    x = L.Dense(80)(x)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    
    x = L.Dense(80)(x)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    
    out = L.Dense(len(target_birds), activation = 'softmax')(x)

    model = Model(inputs = inp, outputs = out)
    opt = Adam(learning_rate)

    model.compile(optimizer = opt, loss = tf.keras.losses.CategoricalCrossentropy(), 
                  metrics = ['accuracy'])
    return model

In [None]:
reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', patience = 2, verbose = 1, factor = 0.5)
early = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', verbose = 1, patience = 5)
check = tf.keras.callbacks.ModelCheckpoint(filepath = 'clef_model.h5', monitor = 'val_loss', verbose = 0, 
                                           save_best_only = True)

In [None]:
model = get_2d_model()
model.summary()

In [None]:
train_df, valid_df = train_test_split(df, test_size = 0.2, random_state = 2021)
print(train_df.shape, valid_df.shape)

In [None]:
STEPS_PER_EPOCH = len(train_df) // BATCH_SIZE

traingen = AudioDataGen(data = train_df, batch_size = BATCH_SIZE)
validgen = AudioDataGen(data = valid_df, batch_size = BATCH_SIZE)

del train_df, valid_df
gc.collect()

In [None]:
history = model.fit(
                traingen, 
                epochs = 3,
                verbose = 1,
                callbacks = [check, reduce, early],
                steps_per_epoch = STEPS_PER_EPOCH,
                validation_data = validgen
        )

# WIP