<a href="https://colab.research.google.com/github/msaintfelix/BirdCLEF2022_Competition/blob/main/BirdCLEF2022_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [None]:
import json
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
np.__version__

'1.21.5'

In [None]:
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
CLASSES_PATH = '/content/drive/MyDrive/Datasets/birdclef-2022/scored_birds.json'
SOURCE_PATH = '/content/drive/MyDrive/Datasets/birdclef-2022/train_audio/'
JSON_PATH = '/content/drive/MyDrive/Datasets/birdclef-2022/data.json'

# Sampling rate.
sr = 32000

# Let's make sure all files have the same amount of samples
TOTAL_SAMPLES = 8 * sr

# X amount of slices => X times more training examples.
NUM_SLICES = 5
SAMPLES_PER_SLICE = int(TOTAL_SAMPLES / NUM_SLICES)

In [None]:
# Let's define the 21 targets for this classification problem
with open(CLASSES_PATH, 'r') as f:
    classes = json.load(f)
f.close()

In [None]:
classes

['akiapo',
 'aniani',
 'apapan',
 'barpet',
 'crehon',
 'elepai',
 'ercfra',
 'hawama',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'houfin',
 'iiwi',
 'jabwar',
 'maupar',
 'omao',
 'puaioh',
 'skylar',
 'warwhe1',
 'yefcan']

In [None]:
def preprocess_data(source_path, json_path):
    
    # Let's create a dictionary of labels and processed data.
    mydict = {
        "labels": [],
        "mfcc": []
        }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(source_path)):
        if os.path.basename(os.path.normpath(dirpath)) in classes:
            for file in filenames:
              if i<13:
                birdcall, sr = librosa.load(os.path.join(dirpath, file), sr=32000, duration=8, mono=True)
                mfcc = librosa.feature.mfcc(y=birdcall, sr=32000, n_mfcc=13)
                mfcc = mfcc.T
                mydict["labels"].append(os.path.basename(os.path.normpath(dirpath)))
                mydict["mfcc"].append(mfcc.tolist())
                        
    # Let's write the dictionary in a json file called 'data'.    
    with open(json_path, 'w') as f:
        json.dump(mydict, f)
    f.close()

In [None]:
def preprocess_data_unit(source_path, json_path):
    # Let's create a dictionary of labels and processed data.
    mydict = {
        "labels": [],
        "mfcc": []
        }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(source_path)):
        if os.path.basename(os.path.normpath(dirpath)) in classes:
            for file in filenames:
              if i<13:
                birdcall, sr = librosa.load(os.path.join(dirpath, file), sr=32000, duration=8, mono=True)
                print(birdcall.shape)
                mfcc = librosa.feature.mfcc(y=birdcall, sr=32000, n_mfcc=13)
                mfcc = mfcc.T
                print(mfcc.shape)
                mydict["labels"].append(os.path.basename(os.path.normpath(dirpath)))
                mydict["mfcc"].append(mfcc.tolist())
                        
    return mydict

In [None]:
def load_data(json_path):

    with open(json_path, 'r') as f:
        data = json.load(f)
    f.close()

    # Let's load our data into numpy arrays for TensorFlow compatibility.
    X = np.concatenate(np.array(data["mfcc"]))
    y = np.array(data["labels"])

    return X, y

In [None]:
def prepare_datasets(inputs, targets, split_size):
    
    # Creating a validation set and a test set.
    inputs_train, inputs_val, targets_train, targets_val = train_test_split(inputs, targets, test_size=split_size)
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs_train, targets_train, test_size=split_size)
    
    # Our CNN model expects 3D input shape.
    inputs_train = inputs_train[..., np.newaxis]
    inputs_val = inputs_val[..., np.newaxis]
    inputs_test = inputs_test[..., np.newaxis]
    
    return inputs_train, inputs_val, inputs_test, targets_train, targets_val, targets_test

In [None]:
def design_model(input_shape):

    # Let's design the model architecture.
    model = tf.keras.models.Sequential([
        
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D((3,3), strides=(2,2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D((3,3), strides=(2,2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
        tf.keras.layers.MaxPooling2D((3,3), strides=(2,2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'), 
        tf.keras.layers.Dense(21, activation='softmax')
    ])

    return model

In [None]:
def make_prediction(model, X, y, idx):
    
    species_dict = {
        0 : 'akiapo',
        1 : 'aniani',
        2 : 'apapan',
        3 : 'barpet',
        4 : 'crehon',
        5 : 'elepai',
        6 : 'ercfra',
        7 : 'hawama',
        8 : 'hawcre',
        9 : 'hawgoo',
        10 : 'hawhaw',
        11 : 'hawpet1',
        12 : 'houfin',
        13 : 'iiwi',
        14 : 'jabwar',
        15 : 'maupar',
        16 : 'omao',
        17 : 'puaioh',
        18 : 'skylar',
        19 : 'warwhe1',
        20 : 'yefcan'
        }
        
    predictions = model.predict(X)
    species = np.argmax(predictions[idx])
    
    print("\n---Now testing the model for one file---\nThe model predicts: {}, and ground truth is: {}.\n".format(species_dict[species], species_dict[y[idx]]))

In [None]:
def plot_performance(hist):
    
    acc = hist.history['acc']
    val_acc = hist.history['val_acc']
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']

    epochs = range(len(acc))

    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.figure()

    plt.plot(epochs, loss, 'r', label='Training Loss')
    plt.plot(epochs, val_loss, 'b', label='Validation Loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

In [None]:
if __name__ == "__main__":

    preprocess_data(source_path=SOURCE_PATH, json_path=JSON_PATH)
    
    inputs, targets = load_data(json_path=JSON_PATH)
    
    Xtrain, Xval, Xtest, ytrain, yval, ytest = prepare_datasets(inputs, targets, 0.2)

    input_shape = (Xtrain.shape[1], Xtrain.shape[2], 1)
    model = design_model(input_shape)

    # Selection of the optimizer, loss type and metrics for performance evaluation.
    model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.001),
                     loss='sparse_categorical_crossentropy',
                     metrics = ['acc']
                     )

    model.summary()

    #Training the model.
    history = model.fit(Xtrain, ytrain,
                        validation_data=(Xval, yval),
                        epochs=1,
                        batch_size=32
                        )

    plot_performance(history)

    # Testing the model on never seen before data.
    make_prediction(model, Xtest, ytest, 24)

In [None]:
 inputs, targets = load_data(json_path=JSON_PATH)

  


In [None]:
dic = preprocess_data_unit(source_path=SOURCE_PATH, json_path=JSON_PATH)

(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(256000,)
(501, 13)
(165504,)
(324, 13)


In [None]:
arr = np.array(dic["mfcc"], dtype=object)

In [None]:
arr.shape

(23,)

In [None]:
dic["mfcc"]

[[[-718.0913696289062,
   5.944169998168945,
   -43.10540771484375,
   31.131284713745117,
   21.198104858398438,
   -6.921740531921387,
   8.987348556518555,
   21.184738159179688,
   -2.4506630897521973,
   -3.8917970657348633,
   9.215142250061035,
   3.0857162475585938,
   -12.553171157836914],
  [-589.701416015625,
   27.318111419677734,
   -58.510353088378906,
   47.28880310058594,
   20.518014907836914,
   1.7436752319335938,
   2.447183609008789,
   29.00727081298828,
   0.19244137406349182,
   7.283350467681885,
   6.843181610107422,
   4.692173004150391,
   -14.283044815063477],
  [-520.9197387695312,
   36.65459442138672,
   -56.500999450683594,
   41.056724548339844,
   33.34832000732422,
   -4.252947807312012,
   9.51169204711914,
   22.039596557617188,
   2.7257676124572754,
   14.578210830688477,
   7.718958854675293,
   0.9289432764053345,
   -3.330151081085205],
  [-495.62872314453125,
   37.704559326171875,
   -52.97560119628906,
   40.090431213378906,
   40.291320800

In [None]:
!python --version

Python 3.7.12
