In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from brian2 import *
from brian2hears import *
import librosa
import librosa.display
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
from sklearn.model_selection import train_test_split
from pydub import AudioSegment

Hyperparameters

In [None]:
featuresdf = pd.read_pickle('approxGT_224_test.pkl')

img_height, img_width = shape(featuresdf['feature'][0])
num_classes = len(unique(featuresdf.class_label))
num_channels = 1

batch_size = 50
normalize_toggle = False
global_pooling = False
learning_rate = 0.001
num_epochs = 10000
learn_rate_gamma = 0.9999
learn_rate_step = 100*batch_size
l2_regularization = 0
paddings=tf.constant([[0,0], [1,1],[1,1],[0,0]])

conv1_out = 32
conv2_out = 32
conv3_out = 0
conv4_out = 0

dropout_p = 0.5

conv_channels = [conv1_out,conv2_out,conv3_out,conv4_out]
active_channels = [i for i in conv_channels if i!=0]
num_layers = len(active_channels)
last_layer_channels = active_channels[num_layers-1]

Helper Functions

In [None]:
def normalize_data(train_df, test_df):
    # compute the mean and std (pixel-wise)
    mean = train_df['feature'].mean()
    std = np.std(np.stack(train_df['feature']), axis=0)

    # normalize train set
    train_spectrograms = (np.stack(train_df['feature']) - mean) / std
    train_labels = train_df['class_label'].to_numpy()
    train_folds = train_df['fold'].to_numpy()
    train_df = pd.DataFrame(zip(train_spectrograms, train_labels, train_folds), columns=['feature', 'class_label', 'fold'])

    # normalize test set
    test_spectrograms = (np.stack(test_df['feature']) - mean) / std
    test_labels = test_df['class_label'].to_numpy()
    test_folds = test_df['fold'].to_numpy()
    test_df = pd.DataFrame(zip(test_spectrograms, test_labels, test_folds), columns=['feature', 'class_label', 'fold'])

    return train_df, test_df

Model Construction - Architecture

In [None]:
#fold_k = 7

train_df, test_df = train_test_split(featuresdf, test_size=0.2, random_state=25)

#train_df = featuresdf[featuresdf.fold != fold_k]
#test_df = featuresdf[featuresdf.fold == fold_k]

if normalize_toggle == True:
    normalize_data(train_df,test_df)

train_images = tf.expand_dims(tf.convert_to_tensor([i for i in train_df['feature']]),3)
train_labels = train_df['class_label']

test_images = tf.expand_dims(tf.convert_to_tensor([i for i in test_df['feature']]),3)
test_labels = test_df['class_label']

train_df = None
test_df = None

In [None]:
model = None
model = tf.keras.Sequential()

# INPUT LAYER
model.add(keras.Input(shape=(img_height, img_width, num_channels)))

model.add(keras.layers.RandomTranslation(height_factor=0,width_factor=0.5))

if num_layers > 0:
    # FIRST CONVOLUTIONAL BLOCK
    model.add(layers.Conv2D(conv1_out,kernel_size=(3,3),padding='valid',strides=(1,1)))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    #model.add()

if num_layers > 1:
    # SECOND CONVOLUTIONAL BLOCK
    model.add(layers.Conv2D(conv2_out,kernel_size=(3,3),padding='valid',strides=(1,1)))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

if num_layers > 2:
    # THIRD CONVOLUTIONAL BLOCK
    model.add(layers.Conv2D(conv3_out,kernel_size=(3,3),padding='valid',strides=(1,1)))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

if num_layers > 3:
    # FOURTH CONVOLUTIONAL BLOCK
    model.add(layers.Conv2D(conv4_out,kernel_size=(3,3),padding='valid',strides=(1,1)))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

model.add(layers.Flatten())

fc_inputlayer_size = model.layers[5].output_shape[1]
model.add(layers.Dense(fc_inputlayer_size))
model.add(layers.ReLU())

model.add(layers.Dense(10))

model.summary()


In [None]:
optimizer = tfa.optimizers.AdamW(weight_decay=l2_regularization, learning_rate=learning_rate)

model.compile(optimizer=optimizer,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=num_epochs, 
                    validation_data=(test_images, test_labels))