In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import soundfile as sf
import librosa
import librosa.display
import IPython.display as display
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
import os
from tqdm import tqdm
import sklearn
import seaborn as sns
import plotly.express as px


import geopandas as gpd
from shapely.geometry import Point, Polygon

from sklearn.model_selection import train_test_split

from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPool1D, BatchNormalization
from keras.optimizers import RMSprop,Adam
from keras.applications import VGG19, VGG16, ResNet50

import warnings
warnings.filterwarnings("ignore")

In [None]:
path = '/kaggle/input/birdclef-2021/'
os.listdir(path)

In [None]:
def read_ogg_file(path, file):
    """ Read ogg audio file and return numpay array and samplerate"""
    
    data, samplerate = sf.read(path+file)
    return data, samplerate


def plot_audio_file(data, samplerate):
    """ Plot the audio data"""
    
    sr = samplerate
    fig = plt.figure(figsize=(8, 4))
    x = range(len(data))
    y = data
    plt.plot(x, y)
    plt.plot(x, y, color='red')
    plt.legend(loc='upper center')
    plt.grid()
    
    
def plot_spectrogram(data, samplerate):
    """ Plot spectrogram with mel scaling """
    
    sr = samplerate
    spectrogram = librosa.feature.melspectrogram(data, sr=sr)
    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    librosa.display.specshow(log_spectrogram, sr=sr, x_axis='time', y_axis='mel')

In [None]:
train_labels = pd.read_csv(path + 'train_soundscape_labels.csv')
train_meta = pd.read_csv(path + 'train_metadata.csv')
test_data = pd.read_csv(path + 'test.csv')
samp_subm = pd.read_csv(path + 'sample_submission.csv')

In [None]:
print('Number train label samples:', len(train_labels))
print('Number train meta samples:', len(train_meta))
print('Number train short folder:', len(os.listdir(path+'train_short_audio')))
print('Number train audios:', len(os.listdir(path+'train_soundscapes')))
print('Number test samples:', len(test_data))

In [None]:
os.listdir(path + 'train_short_audio/caltow')[:2]

In [None]:
train_labels.head()

In [None]:
train_meta.head()

Data Size

In [None]:
print(f"Training Dataset Shape: {train_meta.shape}")
print(f"Training Dataset Labels Shape: {train_labels.shape}")

Column-wise unique values

In [None]:
print("Data: train")
print("-----------")
for col in train_meta.columns:
    print(col + ":" + str(len(train_meta[col].unique())))

print("\nData: train_labels")
print("-----------")
for col in train_labels.columns:
    print(col + ":" + str(len(train_labels[col].unique())))

Time of the Recording

In [None]:
train_meta['year'] = train_meta['date'].apply(lambda x: x.split("-")[0])
train_meta['month'] = train_meta['date'].apply(lambda x: x.split("-")[1])
plt.figure(figsize=(16, 6))
ax = sns.countplot(train_meta['year'].sort_values(ascending=False), palette="hls")

plt.title("Audio Files Registration per Year Made", fontsize=16)
plt.xticks(rotation=70, fontsize=13)
plt.yticks(fontsize=13)
plt.ylabel("Frequency", fontsize=14)
plt.xlabel("");

In [None]:
plt.figure(figsize=(16, 6))
ax = sns.countplot(train_meta['month'].sort_values(ascending=False), palette="hls")

plt.title("Audio Files Registration per Month Made", fontsize=16)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.ylabel("Frequency", fontsize=14)
plt.xlabel("");

In [None]:
row = 1000
train_meta.iloc[row]

In [None]:
label = train_meta.loc[row, 'primary_label']
filename = train_meta.loc[row, 'filename']

# Check if the file is in the folder
filename in os.listdir(path+'train_short_audio/' + label)

Visualizing the audio

In [None]:
filename = f'../input/birdclef-2021/train_short_audio/{label}/{filename}'
filename

In [None]:
plt.figure(figsize=(18, 5))

# by default librosa.load returns a sample rate of 22050
# librosa converts input to mono, hence always 
data, sample_rate = librosa.load(filename)
librosa.display.waveplot(data, sr=sample_rate)
print("Sample Rate: ", sample_rate)
ipd.Audio(filename)

## Spectrogram
A spectrogram is a visual representation of the spectrum of frequencies of a signal as it varies with time.

In [None]:
plt.figure(figsize=(18, 5))
X = librosa.stft(data)
Xdb = librosa.amplitude_to_db(abs(X))
librosa.display.specshow(Xdb, sr=sample_rate, x_axis='time', y_axis='hz')
plt.colorbar();

## Spectral Centroid
The spectral centroid is a measure used in digital signal processing to characterise a spectrum. It indicates where the center of mass of the spectrum is located. 

In [None]:
spectral_centroids = librosa.feature.spectral_centroid(data, sr=sample_rate)[0]
plt.figure(figsize=(25, 9))
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)

# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)

#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(data, sr=sample_rate, alpha=0.4)
plt.plot(t, normalize(spectral_centroids), color='b');

## Spectral Rolloff
It is a measure of the shape of the signal. It represents the frequency at which high frequencies decline to 0.

In [None]:
plt.figure(figsize=(25, 9))
spectral_rolloff = librosa.feature.spectral_rolloff(data+0.01, sr=sample_rate)[0]
librosa.display.waveplot(data, sr=sample_rate, alpha=0.4)
plt.plot(t, normalize(spectral_rolloff), color='r');

## Spectral bandwidth
The spectral bandwidth is defined as the width of the band of light at one-half the peak maximum (or full width at half maximum [FWHM]) and is represented by the two vertical red lines and λSB on the wavelength axis.

In [None]:
spectral_bandwidth_2 = librosa.feature.spectral_bandwidth(data+0.01, sr=sample_rate)[0]
spectral_bandwidth_3 = librosa.feature.spectral_bandwidth(data+0.01, sr=sample_rate, p=3)[0]
spectral_bandwidth_4 = librosa.feature.spectral_bandwidth(data+0.01, sr=sample_rate, p=4)[0]
plt.figure(figsize=(25, 9))
librosa.display.waveplot(data, sr=sample_rate, alpha=0.4)
plt.plot(t, normalize(spectral_bandwidth_2), color='r')
plt.plot(t, normalize(spectral_bandwidth_3), color='g')
plt.plot(t, normalize(spectral_bandwidth_4), color='y')
plt.legend(('p = 2', 'p = 3', 'p = 4'));  # p: order of spectral bandwidth

## Zero-Crosing Rate
The zero-crossing rate (ZCR) is the rate at which a signal changes from positive to zero to negative or from negative to zero to positive.

In [None]:
#Plot the signal:
plt.figure(figsize=(25, 9))
# librosa.display.waveplot(data, sr=sample_rate)
# Zooming in
n0 = 9000
n1 = 9100

plt.plot(data[n0:n1])
plt.grid()

In [None]:
zero_crossings = librosa.zero_crossings(data[n0:n1], pad=False)
print(sum(zero_crossings))

## Mel-Frequency Cepstral Coefficients (MFCCs)
The Mel frequency cepstral coefficients (MFCCs) of a signal are a small set of features (usually about 10–20) which concisely describe the overall shape of a spectral envelope.

In [None]:
mfccs = librosa.feature.mfcc(data, sr=sample_rate)

#Displaying  the MFCCs:
plt.figure(figsize=(15, 7))
librosa.display.specshow(mfccs, sr=sample_rate, x_axis='time')
plt.colorbar();

## Chrome features
A chroma feature or vector is typically a 12-element feature vector indicating how much energy of each pitch class, {C, C#, D, D#, E, …, B}, is present in the signal.

In [None]:
hop_length=512
chromagram = librosa.feature.chroma_stft(data, sr=sample_rate, hop_length=hop_length)
plt.figure(figsize=(20, 8))
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm')
plt.colorbar();

In [None]:
print("There are total {} species".format(train_meta['primary_label'].nunique()))

## Top 100

In [None]:
def plotbar(series, pal):
    plt.figure(figsize=(20, 9))
    chart = sns.barplot(x=series.index, y=series.values, edgecolor=(0,0,0), linewidth=2, palette=(pal))
    chart.set_xticklabels(chart.get_xticklabels(), rotation=45)
    
    
species = train_meta['primary_label'].value_counts()[:100]
plotbar(species, "Blues_r") # series, palette

In [None]:
sns.set(rc={'figure.figsize':(20,6)})
sns.countplot(x='rating', data=train_meta, edgecolor=(0,0,0), linewidth=2, palette=('cubehelix'));

There are only few files with low ratings

In [None]:
authors = train_meta['author'].value_counts()[:10]
plotbar(authors, "YlOrBr_r") # series, palette

Richard E. Webster is author having maximum file entries.

## Top 100 training samples per species

In [None]:
print("Common Name")
common = train_meta['common_name'].value_counts()[:100]
plotbar(authors, "light:b_r") # series, palette

In [None]:
print("Scientific Name- Top-50")
scien = train_meta['scientific_name'].value_counts()[:50]
plotbar(scien, "Greens_r") # series, palette

In [None]:
plt.figure(figsize=(18, 5))

# by default librosa.load returns a sample rate of 22050
# librosa converts input to mono, hence always 
sig, sample_rate = librosa.load(filename)
librosa.display.waveplot(data, sr=sample_rate)
print("Sample Rate: ", sample_rate)
ipd.Audio(filename)

In the past that convolutional neural networks (CNN) perform particularly well for sound classification. But CNN need 2D inputs. Luckily, we can transform an audio signal into a 2D representation: a so-called spectrogram.

In [None]:
# First, compute the spectrogram using the "short-time Fourier transform" (stft)
spec = librosa.stft(sig)

# Scale the amplitudes according to the decibel scale
spec_db = librosa.amplitude_to_db(spec, ref=np.max)

# Plot the spectrogram
plt.figure(figsize=(15, 5))
librosa.display.specshow(spec_db, 
                         sr=32000, 
                         x_axis='time', 
                         y_axis='hz', 
                         cmap=plt.get_cmap('viridis'));

In [None]:
print('SPEC SHAPE:', spec_db.shape)

## Spectrums

In [None]:
import os
import warnings
import shutil
warnings.filterwarnings(action='ignore')

import math
import pandas as pd
import librosa
import librosa.display
import numpy as np
import seaborn as sns; sns.set(style='whitegrid')
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from PIL import Image
from tqdm import tqdm,tnrange,tqdm_notebook
import tensorflow as tf
from tqdm.keras import TqdmCallback
from keras.callbacks import ReduceLROnPlateau,EarlyStopping,ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras import applications as app
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten,AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense,BatchNormalization,Dropout 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.applications import EfficientNetB4, ResNet50,ResNet101, VGG16, MobileNet, InceptionV3

In [None]:
# Global Coefficients that can be modified
class coefs:
    
    # Generate Subset
    rat_id = 4 # rating subset limiter 
    recs = 200 # each specie must have X recodings
    max_files = 1500 # general last limit for rows
    thresh = 0.25 # label probability selection threshold
    submission = True # For Submission Only (Less Inference Output)
    
    # Global vars
    seed = 1337
    sr = 32000        # librosa sample rate input
    sl = 5 # seconds   
    sshape = (48,128) # height x width
    fmin = 500      # spectrum min frequency
    fmax = 12500    # spectrum max frequency
    n_epoch = 100   # training epochs
    cutoff = 15     # 3 sample spectogram (training) overwritten for inference

path_switch = False

# Helper Functions Stored Below

In [None]:
# Plot Keras Training History
def HistPlot():

    fig,ax = plt.subplots(1,2,figsize=(12,4))
    sns.despine(top=True,left=True,bottom=True)

    ax[0].plot(history.history['accuracy'])
    ax[0].plot(history.history['val_accuracy'])
    ax[0].set_title('model accuracy')
    ax[0].set_ylabel('accuracy')
    ax[0].set_xlabel('epoch')
    ax[0].grid(True,linestyle='--',alpha=0.5)
    
    ax[1].plot(history.history['loss'])
    ax[1].plot(history.history['val_loss'])
    ax[1].set_title('model loss')
    ax[1].set_ylabel('loss')
    ax[1].set_xlabel('epoch')
    ax[1].legend(['train', 'test'], loc='upper left')
    ax[1].grid(True,linestyle='--',alpha=0.5)
    plt.show()

# Split the Input signal into segments
def split_signal(sig):
    sig_splits = []
    for i in range(0, len(sig), int(coefs.sl * coefs.sr)):
        split = sig[i:i + int(coefs.sl * coefs.sr)]
        if len(split) < int(coefs.sl * coefs.sr):
            break
        sig_splits.append(split)
    
    return sig_splits

# extracts spectrograms and saves them in a working directory
def get_spectrograms(filepath, primary_label, output_dir):

    # duration is set from global variable
    sig, rate = librosa.load(filepath, sr=coefs.sr, offset=None, duration=coefs.cutoff)
    sig_splits = split_signal(sig) # split the signal into parts
    
    # Extract mel spectrograms for each audio chunk
    s_cnt = 0
    saved_samples = []
    for chunk in sig_splits:
        
        hop_length = int(coefs.sl * coefs.sr / (coefs.sshape[1] - 1))
        mel_spec = librosa.feature.melspectrogram(y=chunk, 
                                                  sr=coefs.sr, 
                                                  n_fft=1024, 
                                                  hop_length=hop_length, 
                                                  n_mels=coefs.sshape[0], 
                                                  fmin=coefs.fmin, 
                                                  fmax=coefs.fmax)
    
        mel_spec = librosa.power_to_db(mel_spec**2, ref=np.max) 
        
        # Normalize
        mel_spec -= mel_spec.min()
        mel_spec /= mel_spec.max()
        
        # Save as image file
        save_dir = os.path.join(output_dir, primary_label)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = os.path.join(save_dir, filepath.rsplit(os.sep, 1)[-1].rsplit('.', 1)[0] + 
                                 '_' + str(s_cnt) + '.png')
        im = Image.fromarray(mel_spec * 255.0).convert("L")
        im.save(save_path)
        
        saved_samples.append(save_path)
        s_cnt += 1
        
    return saved_samples

# https://stackoverflow.com/questions/1524126/how-to-print-a-list-more-nicely
def list_columns(obj, cols=4, columnwise=True, gap=4):
    sobj = [str(item) for item in obj]
    if cols > len(sobj): cols = len(sobj)
    max_len = max([len(item) for item in sobj])
    if columnwise: cols = int(math.ceil(float(len(sobj)) / float(cols)))
    plist = [sobj[i: i+cols] for i in range(0, len(sobj), cols)]
    if columnwise:
        if not len(plist[-1]) == cols:
            plist[-1].extend(['']*(len(sobj) - len(plist[-1])))
        plist = zip(*plist)
    printer = '\n'.join([
        ''.join([c.ljust(max_len + gap) for c in p])
        for p in plist])
    print (printer)

In [None]:
''' CREATE A SUBSET OF THE DATA '''
print('STEP 1) CREATING A SUBSET OF DATASET:\n')

lpath = '../input/birdclef-2021/train_metadata.csv'
train = pd.read_csv(lpath)
print(f"[DATASET]: {train.values.shape} : LABELS {len(train.primary_label.value_counts())}")

# subset filter 1 (rating)
temp_str = 'rating>='+str(coefs.rat_id)
train = train.query(temp_str)
print('\nRATING LIMITER APPLIED:')
print(f'[SUBSET]: {train.values.shape} : LABELS {len(train.primary_label.value_counts())}')

# subset filter 2 (number of recordings per specie)
birds_count = {};
a = train.primary_label.unique() 
a_val = train.groupby('primary_label')['primary_label'].count().values
for bird_species, count in zip(a,a_val):
    birds_count[bird_species] = count
to_model_spec = [key for key,value in birds_count.items() if value >= coefs.recs] 

print(f'\n {coefs.recs}+ RECORDINGS ONLY BIRDS LIMITED:')
TRAIN = train.query('primary_label in @to_model_spec')
LABELS = sorted(TRAIN.primary_label.unique())
print(f'[SUBSET]: {TRAIN.values.shape} : LABELS {len(LABELS)}')

print('\n BIRD LABELS AVAILABLE AFTER FILTER:')
list_columns(to_model_spec, cols=4, columnwise=True, gap=4)

# subset filter 3 (max audio files)

# Shuffle the training data and limit the number of audio files to max_files
print('\nLIMITING AUDIO FILES ...')
TRAIN = shuffle(TRAIN, random_state=coefs.seed)[:coefs.max_files]
LABELS = sorted(TRAIN.primary_label.unique())
print(f'[SUBSET]: {TRAIN.values.shape} : LABELS {len(LABELS)}')

In [None]:
''' CREATE & OUTPUT SPECTOGRAMS FOR TRAINING'''
# we will use CNN approach 

# Parse audio files and extract training samples
input_dir = '../input/birdclef-2021/train_short_audio/'
output_dir = './working/melspectrogram_dataset/'

samples = []
with tqdm_notebook(total=len(TRAIN)) as pbar:
    for idx, row in TRAIN.iterrows():
        pbar.update(1)
        
        if row.primary_label in to_model_spec:
            audio_file_path = os.path.join(input_dir, row.primary_label, row.filename)
            samples += get_spectrograms(audio_file_path, row.primary_label, output_dir)
            
TRAIN_SPECS = shuffle(samples, random_state=coefs.seed)
print('SUCCESSFULLY EXTRACTED {} SPECTROGRAMS'.format(len(TRAIN_SPECS)))

In [None]:
Image.open(f'./working/melspectrogram_dataset/rewbla/XC168632_0.png')

In [None]:
from PIL import Image
mels_dir = './working/melspectrogram_dataset'
all_dirs = os.listdir(mels_dir)
for i in tqdm(all_dirs):
    photos = os.listdir(f'{mels_dir}/{i}')
    for k in photos:
        img = Image.open(f'{mels_dir}/{i}/{k}')
        img = np.asarray(img)**2
        img = Image.fromarray(img)
        img.save(f'{mels_dir}/{i}/{k}')

In [None]:
Image.open(f'./working/melspectrogram_dataset/rewbla/XC168632_0.png')

In [None]:
''' DATALOADERS '''
# Create Data Generators/Loader for Keras, images not to be deleted

train_folder = './working/melspectrogram_dataset/'
valid_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    shear_range=10,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(train_folder, 
                        target_size=(coefs.sshape[0],coefs.sshape[1]),  # target size
                        batch_size=32, 
                        seed=42,
                        subset = "training",
                        class_mode='categorical')

validation_generator = valid_datagen.flow_from_directory(train_folder, 
                        target_size=(coefs.sshape[0],coefs.sshape[1]),  # target size
                        batch_size=32, 
                        seed=42,
                        subset = "validation",
                        class_mode='categorical')

In [None]:
Image.open('./working/melspectrogram_dataset/rewbla/XC488344_2.png')

In [None]:
from PIL import Image
img = Image.open('./working/melspectrogram_dataset/rewbla/XC488344_2.png')
img = np.asarray(img)**2
import cv2 as cv
Image.fromarray(img)

In [None]:
for imgs, labels in train_generator:
    plt.imshow(imgs[3])
    print(labels[2])
    break

In [None]:
### tf.random.set_seed(coefs.seed)
model = tf.keras.Sequential([
    # First conv block
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', 
                           input_shape=(coefs.sshape[0], coefs.sshape[1],3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # Second conv block
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)), 
    
    # Third conv block
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)), 
    
    # Fourth conv block
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # Global pooling instead of flatten()
    tf.keras.layers.GlobalAveragePooling2D(), 
    
    # Dense block
    tf.keras.layers.Dense(256, activation='relu'),   
    tf.keras.layers.Dropout(0.5),  
    tf.keras.layers.Dense(256, activation='relu'),   
    tf.keras.layers.Dropout(0.5),
    
    # Classification layer
    tf.keras.layers.Dense(len(LABELS), activation='softmax')
])

In [None]:
tf.random.set_seed(coefs.seed)
def pretrained_model(head_id):

    # Define model with different applications
    model = Sequential()

    ''' Define Head Pretrained Models '''

    if(head_id is 'vgg'):
        model.add(VGG16(input_shape=(coefs.sshape[0],coefs.sshape[1],3),
                        pooling='avg',
                        include_top=False,
                        weights='imagenet'))

    elif(head_id is 'resnet'):
        model.add(ResNet101(include_top=False,
                            input_tensor=None,
                            input_shape=(coefs.sshape[0],coefs.sshape[1],3),
                            pooling='avg',
                            weights=None))

    elif(head_id is 'mobilenet'):
        model.add(MobileNet(alpha=1.0,
                            depth_multiplier=1,
                            dropout=0.001,
                            include_top=False,
                            weights=None,
                            input_tensor=None,
                            input_shape = (coefs.sshape[0],coefs.sshape[1],3),
                            pooling=None))

    elif(head_id is 'inception'):
        # 75x75
        model.add(InceptionV3(input_shape=(coefs.sshape[0],coefs.sshape[1],3),
                              include_top = False,
                              weights = 'imagenet'))

    elif(head_id is 'efficientnet'):
        model.add(EfficientNetB4(input_shape = (coefs.sshape[0],coefs.sshape[1],3), 
                                 include_top = False, 
                                 weights = 'imagenet'))

    ''' Tail Model Part '''
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.01))
    model.add(Dense(len(LABELS), activation='softmax'))

    # # freeze main model coefficients
#     model.layers[0].trainable = False
    model.summary()

    return model

# Select & Comment out above cell if used
# model = pretrained_model('mobilenet') # define the model
# model = tf.keras.models.load_model('../input/keras-pretrained-models/MobileNet_NoTop_ImageNet.h5') # Reload your model 

In [None]:
# Compile the model and specify optimizer, loss and metric
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0008),
              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.01),
              metrics=['accuracy'])

# Add callbacks to reduce the learning rate if needed, early stopping, and checkpoint saving
callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.5),
             EarlyStopping(monitor='val_loss', verbose=1, patience=10),
             ModelCheckpoint(filepath='bird_resnet101_best.h5', monitor='val_loss', verbose=0, save_best_only=True),
             TqdmCallback(verbose=0)
            ]
model.summary()

In [None]:
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    verbose=1,
                    callbacks=callbacks,
                    epochs=coefs.n_epoch)

In [None]:
# Plot Training 
HistPlot() # plot accuracy metric & loss function

In [None]:
''' I. HELPER FUNCTIONS'''
# Define Path for Training & Test Soundscape Data
# If competition reruns notebook, test data folder will be loaded

# Print Available Soundscape Files
def print_available(verbose = False):
    
    def list_files(path):
        return [os.path.join(path, f) for f in os.listdir(path) if f.rsplit('.', 1)[-1] in ['ogg']]
    test_audio = list_files('../input/birdclef-2021/test_soundscapes')
    if len(test_audio) == 0:
        test_audio = list_files('../input/birdclef-2021/train_soundscapes')

    if(verbose):
        print('AVAILABLE SOUNDSCAPES:')
        print('{} FILES IN TEST SET.'.format(len(test_audio)))
        print('')
    
        ii=-1
        for i in test_audio:
            ii+=1
            print(ii,i)
        
    return test_audio

# Get the labels that will be used to train the model
def print_label_species():
    list_columns(to_model_spec, cols=4, columnwise=True, gap=2)

In [None]:
model.save('bird_resnet101.h5')
model = tf.keras.models.load_model('./bird_resnet101.h5')
model.summary()

In [None]:
pd.set_option('display.max_rows', None)
# Function to evaluate inference on given recording
def soundscape_records(path,model,submission=False):
    
    # before prediction clear read folder if it exists
    if(os.path.exists('.//working/mel_soundscape/')):
        shutil.rmtree('.//working/mel_soundscape/')
#         os.listdir('.//working//')

    # General Output / Submission DataFrame Structure  
    if(submission is False):
        data = {'row_id': [], 'prediction': [], 'score': []}
    else:
        data = {'row_id': [], 'birds': []}
    
    print('*** READING NEW FILE & STARTING PREDICTION... ***')
    print(f'Reading File: {path}')
    
    ''' 1. CREATE SOUNDSCAPE FILES AND SAVE THEM '''
    # for each spectogram input, call get_spectogram, which cuts the entire
    # soundscape into chunks of 5 seconds
    
    coefs.cutoff = 600 # change to get all 5s segments in soundscape; should make 120 files
    get_spectrograms(path,'soundscape','.//working/mel_soundscape/')
#     print(len(os.listdir('.//working/mel_soundscape/soundscape'))) # should be 120
    
    ''' 2. LOAD IMAGE FILES & DATALOADER '''
    # soundscape recording folder
    soundscape_folder = './/working/mel_soundscape/'   
    # image augmentation & generate dataloader
    gen_datagen = ImageDataGenerator(rescale=1./255)
    gen_test = gen_datagen.flow_from_directory(soundscape_folder,
                        target_size=(coefs.sshape[0],coefs.sshape[1]),
                        batch_size=32,
                        class_mode='categorical')
    
    ''' 3. MAKE MODEL PREDICTION '''
    # for each class predict probability for all images simulaneously
    scores = model.predict(gen_test, verbose=1)
    
    # For each soundcape -> create X chunks + predict each 
    
    time_id=0
    for i in range(len(scores)):
    
        time_id+=5 # update segment time interval 
        idx = scores[i].argmax()      # possibly not best choice
        species = LABELS[idx]
        score = scores[i][idx]
        
        data['row_id'].append(path.split(os.sep)[-1].rsplit('_', 1)[0] + '_' + str(time_id))
        
        ''' *DECIDE IF PREDICTION PROBABILITY SHOULD EXCEED THRESHOLD '''
        if score > coefs.thresh:
            if(submission is False):
                data['prediction'].append(species)
            else:
                data['birds'].append(species)
        else:
            if(submission is False):
                data['prediction'].append('nocall')
            else:
                data['birds'].append('nocall')
        
         # store score
        if(submission is False):
            data['score'].append(score) # Add the confidence score as well
        
    # COMBINE & SHOW RESULTS
    if(submission is False):
        results = pd.DataFrame(data, columns = ['row_id', 'prediction', 'score'])
    else:
        results = pd.DataFrame(data, columns = ['row_id', 'birds'])
    
    if(submission is False):
        gt = pd.read_csv('../input/birdclef-2021/train_soundscape_labels.csv')
        results = pd.merge(gt, results, on='row_id') # merge only at available rows
        results['outcome'] = results.birds == results.prediction
        intersection_set = list(set(LABELS) & set(results.birds.to_list()))

        print('1A. Before Prediction:')
        list_columns(LABELS, cols=8, columnwise=True, gap=4)
        print('1B. Birds Present')
        list_columns(results.birds.unique())
        print(f'bird overlap: {len(intersection_set)}/{len(results.birds.unique())} are even present')

        print('\n 2. All Predictions:')
        print(results.outcome.value_counts())
        print('')

        print('3. Bird Predictions Only:')
        df_bird = results[results.birds!='nocall']
        print(df_bird.outcome.value_counts())
        print('\n\n')
        return 0
    else:
        return results # return one soundscape inference
    
''' MAIN INFERENCE OPTIONS '''
# Use model to evaluate on soundscape segments in one or many files
    
# A. Get Pathways to Soundscapes 
test_audio = print_available()
    
# B. Inference on all Soundscape, tlist stores all soundscape individual results
ii=-1;tlist = []
for i in test_audio:
#     model = tf.keras.models.load_model('best_model.h5') # load external model
    ii+=1;df_infer = soundscape_records(test_audio[ii],model,coefs.submission)
    if(coefs.submission):
        tlist.append(df_infer)

# combine all soundscape inference results
if(coefs.submission):    
    df_allres = pd.concat(tlist)
    df_allres.to_csv("submission.csv", index=False)
    
# C. Inference for one soundscape
# model = tf.keras.models.load_model('best_model.h5')
# soundscape_records(test_audio[0],model,coefs.submission)

# Remove Training Spectrums to not show them in output
shutil.rmtree('.//working/melspectrogram_dataset/')
os.listdir('.//working//')

In [None]:
df_allres