In [1]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm

import tensorflow as tf
# Set logging level to avoid unnecessary messages
tf.get_logger().setLevel('ERROR')
# Set autograph verbosity to avoid unnecessary messages
tf.autograph.set_verbosity(0)

import keras
import tensorflow_io as tfio
import tensorflow_probability as tfp
import tensorflow_extra as tfe

import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio

from pathlib import Path
import sys
import os
import time
import warnings
# suppress all warnings
warnings.filterwarnings("ignore")

XC_ROOTDIR = '../../data/' # directory to save data in
XC_DIR = 'test_dataset10' # subdirectory name of dataset

2024-06-24 22:36:50.912895: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-24 22:36:50.912948: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-24 22:36:50.914066: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-24 22:36:50.921270: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


2024-06-24 22:36:53.433840: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-24 22:36:53.470351: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-24 22:36:53.470712: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


### Load dataset

In [3]:
df = pd.read_csv("../../data/dataset10.csv")
df["fullfilename"] = "../" + df["fullfilename"]

### Configurations

In [4]:
class cfg:
    seed = 42

    # audio clip settings
    sr = 22050 # the target sampling rate
    duration = 15 # the duration of the clips
    n_samples = duration*sr
    hop_length = 2048 # "stepsize" of the fft for the melspectrograms
    nfft = 4096 # windowsize of the fft for the melspectrograms
    n_mels = 128 # number of mel frequency bins
    fmax = sr/2 # maximum frequency in the melspectrograms
    input_dim = (int(duration*sr/hop_length + 1), n_mels)

    # class labels/names
    names = list(np.unique(df.en))
    n_classes = len(names)
    labels = list(range(n_classes))
    label2name = dict(zip(labels, names))
    name2label = {v:k for k,v in label2name.items()}

tf.keras.utils.set_random_seed(cfg.seed)

## Data generator

In [5]:
# Generates random integer # from https://www.kaggle.com/code/wengsilu/birdclef24pretraining
def random_int(shape=[], minval=0, maxval=1):
    return tf.random.uniform(shape=shape, minval=minval, maxval=maxval, dtype=tf.int32)

# Generats random float
def random_float(shape=[], minval=0.0, maxval=1.0):
    rnd = tf.random.uniform(shape=shape, minval=minval, maxval=maxval, dtype=tf.float32)
    return rnd

In [6]:
 def decode(filepath):
        # read audio
        #try:
        audio = tfio.audio.AudioIOTensor(filepath, dtype = tf.float32) # lazy load the file
        #except Exception as e:
        #    print(f"Error loading audio file {filepath} with TensorFlow I/O: {e}")
        #    print("Proceeding to slow load file")
        #    return slow_load(filepath)
        #if audio.shape[0] == 0:
        #    print(f"Failed to load audio file {filepath.numpy} with TensorFlow I/O: shape[0] = 0")
        #    print("Proceeding to slow load file")
        #    return slow_load(filepath)
        
        rate = audio.rate
        # cut out clip of specified duration at random position
        num_samples = cfg.duration*rate
        length = tf.cast(audio.shape[0], tf.int32)
        if num_samples < length:
            rdm = random_int(maxval = length - num_samples)
            audio = audio[rdm:rdm+num_samples]
        else:
            audio = audio.to_tensor()
        audio = tf.cast(audio, tf.float32)
        # resample if necessary
        audio = tfio.audio.resample(audio, tf.cast(rate, tf.int64), cfg.sr) if rate != cfg.sr else audio
        # remove noise (tfio.audio.split() or tfio.audio.trim()?)# can't do this when the clip is already cut
        # stereo to mono
        audio = tf.reduce_mean(audio, axis=-1) if tf.shape(audio)[-1] == 2 else tf.squeeze(audio, axis = -1)
        # pad if necessary
        if tf.size(audio) < cfg.desired_length:
            missing = cfg.desired_length - tf.size(audio)
            rdm = random_int(maxval = missing)
            audio = tf.pad(audio, [[rdm, missing-rdm]]) # pad rdm zeros left and missing-rdm zeros rigth
        audio = tf.reshape(audio, [cfg.sr*cfg.duration])
        return audio

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=16, 
                 dim=cfg.input_dim,
                 n_channels =  1,
                 n_classes=cfg.n_classes, shuffle=True):
        'Initialization'
        self.dim = dim
        self.n_channels = n_channels
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = tf.reshape(load_audio(df.iloc[ID].fullfilename), [*self.dim, self.n_channels])
            #load_audio(df.iloc[ID].fullfilename).reshape(*self.dim, self.n_channels)
            
            # Store class
            y[i] = cfg.name2label[df.iloc[ID].en]
        X = X.reshape(len(X), *self.dim, self.n_channels)
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

## Model

In [7]:
from keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

# can't get this to work
#melspec_layer = tfe.layers.MelSpectrogram(n_fft=cfg.nfft, 
#                                          hop_length=cfg.hop_length, 
#                                          sr=cfg.sr, 
#                                          fmin=0,
#                                          fmax=cfg.fmax,
#                                          )

norm_layer = tfe.layers.ZScoreMinMax()

def build_model():
    model = Sequential()
    #model.add(melspec_layer)
    model.add(norm_layer)
    model.add(Conv2D(32, kernel_size=(3, 3), padding='valid', activation='relu', input_shape=(int(cfg.duration*cfg.sr/cfg.hop_length + 1), cfg.n_mels, 1)))
    model.add(Conv2D(64, kernel_size=(3, 3), padding='valid', activation='relu'))
    model.add(Conv2D(128, kernel_size=(3, 3), padding='valid', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), padding="valid"))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(10, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.summary()
    return model

In [8]:
model = build_model()

2024-06-24 22:36:53.600913: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-24 22:36:53.601253: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-24 22:36:53.601488: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-24 22:36:53.772125: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-24 22:36:53.772954: I external/local_xla/xla/stream_executor

In [9]:
from sklearn.model_selection import train_test_split

id_train, id_val, y_train, y_val = train_test_split(range(300), df["en"].to_list(), test_size = 0.3, random_state = cfg.seed)

training_generator = DataGenerator(id_train, y_train)
validation_generator = DataGenerator(id_val, y_val)

In [None]:
hist = model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True, verbose = 2, epochs = 15)

2024-06-24 22:36:54.350528: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
2024-06-24 22:36:54.355080: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available
2024-06-24 22:36:55.132198: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-06-24 22:37:03.464237: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2024-06-24 22:37:03.603140: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-06-24 22:37:04.886531: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.55GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that the

Epoch 1/15


2024-06-24 22:37:05.258013: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.55GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-06-24 22:37:06.605587: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2024-06-24 22:37:06.766513: F external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:156] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2024-06-24 22:37:06.801992: F external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:156] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
2024-06-24 22:37:06.863948: F external/local_xla/xla/stream_executor/cuda/cuda_driver.cc: