In [12]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [13]:
import json
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [15]:
CLASSES_PATH = 'Dataset/birdclef-2022/scored_birds.json'
SOURCE_PATH = 'Dataset/birdclef-2022/train_audio/'
JSON_PATH = 'Dataset/birdclef-2022/data.json'

sr = 32000

TOTAL_SAMPLES = 5 * sr

In [16]:
# Let's define the 21 classes for this classification problem
with open(CLASSES_PATH, 'r') as f:
    classes = json.load(f)
f.close()

In [17]:
classes

['akiapo',
 'aniani',
 'apapan',
 'barpet',
 'crehon',
 'elepai',
 'ercfra',
 'hawama',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'houfin',
 'iiwi',
 'jabwar',
 'maupar',
 'omao',
 'puaioh',
 'skylar',
 'warwhe1',
 'yefcan']

In [65]:
def preprocess_data(SOURCE_PATH, JSON_PATH):
    
    # Let's create a dictionary of labels and processed data.
    mydict = {
        "labels": [],
        "mfcc": []
        }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(SOURCE_PATH)):
        if os.path.basename(os.path.normpath(dirpath)) in classes:
            for file in filenames:
                birdcall, sr = librosa.load(os.path.join(dirpath, file), duration=10)
                mfcc = librosa.feature.mfcc(y=birdcall, sr=sr, n_mfcc=13)
                mfcc = mfcc.T
                mydict["labels"].append(i-1)
                mydict["mfcc"].append(mfcc.tolist())
                
    # Let's write the dictionary in a json file called data.    
    with open(JSON_PATH, 'w') as f:
        json.dump(mydict, f)
    f.close()

In [None]:
def load_data(JSON_PATH):

    with open(JSON_PATH, 'r') as f:
        data = json.load(f)
    f.close()

    # Let's load our data into numpy arrays for TensorFlow compatibility.
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    return X, y

In [None]:
def prepare_datasets(inputs, targets, split_size):
    
    # Creating a validation set and a test set.
    inputs_train, inputs_val, targets_train, targets_val = train_test_split(inputs, targets, test_size=split_size)
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs_train, targets_train, test_size=split_size)
    
    # Our CNN model expects 3D input shape.
    inputs_train = inputs_train[..., np.newaxis]
    inputs_val = inputs_val[..., np.newaxis]
    inputs_test = inputs_test[..., np.newaxis]
    
    return inputs_train, inputs_val, inputs_test, targets_train, targets_val, targets_test

In [None]:
def design_model(input_shape):

    # Let's design the model architecture.
    model = tf.keras.models.Sequential([
        
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D((3,3), strides=(2,2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D((3,3), strides=(2,2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Conv2D(32, (2,2), activation='relu'),
        tf.keras.layers.MaxPooling2D((3,3), strides=(2,2), padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'), 
        tf.keras.layers.Dense(len(np.unique(targets)), activation='softmax')
    ])

    return model

In [None]:
def make_prediction(model, X, y, idx):
    
    genre_dict = {
        0 : 'akiapo',
        1 : 'aniani',
        2 : 'apapan',
        3 : 'barpet',
        4 : 'crehon',
        5 : 'elepai',
        6 : 'ercfra',
        7 : 'hawama',
        8 : 'hawcre',
        9 : 'hawgoo',
        10 : 'hawhaw',
        11 : 'hawpet1',
        12 : 'houfin',
        13 : 'iiwi',
        14 : 'jabwar',
        15 : 'maupar',
        16 : 'omao',
        17 : 'puaioh',
        18 : 'skylar',
        19 : 'warwhe1',
        20 : 'yefcan']
        }
        
    predictions = model.predict(X)
    genre = np.argmax(predictions[idx])
    
    print("\n---Now testing the model for one audio file---\nThe model predicts: {}, and ground truth is: {}.\n".format(genre_dict[genre], ge

In [66]:
classes

['akiapo',
 'aniani',
 'apapan',
 'barpet',
 'crehon',
 'elepai',
 'ercfra',
 'hawama',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'houfin',
 'iiwi',
 'jabwar',
 'maupar',
 'omao',
 'puaioh',
 'skylar',
 'warwhe1',
 'yefcan']