In [None]:
import os
from google.colab import drive

#drive.mount('/content/drive')

if 'drive' in os.listdir():
  directory = 'drive/MyDrive/UrbanSound/'
else:
  directory = ''

# Download and settings

In [None]:
!wget -c https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz

In [None]:
!tar -xf UrbanSound8K.tar.gz

In [None]:
!rm UrbanSound8K.tar.gz

In [None]:
!pip install librosa

# Preprocessing

In [None]:
#Just some metadata EDA
import pandas as pd

metadata = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')
metadata.tail()

In [None]:
metadata['class'].isna().any()

In [None]:
metadata['slice_file_name'].isna().any()

In [None]:
metadata.set_index('slice_file_name',inplace=True)

In [None]:
import matplotlib.pyplot as plt

metadata['class'].value_counts().plot.pie()
plt.savefig('class-pie.pdf',bbox_inches='tight')
plt.show()

In [None]:
sample_rate = 44100

In [None]:
from os import listdir
import librosa

raw_sounds = []

for fold in range(1,11):
  path = f'UrbanSound8K/audio/fold{fold}/'
  for file in listdir(path):
    if "wav" in file:
      file_path = path + file
      sound_file, sr = librosa.load(file_path, sr = sample_rate) #load raw sound from sound files
      #print((file,sound_file,train_labels.loc[file]['classID']))
      row = metadata.loc[file]
      raw_sounds.append((file,sound_file,row['fold'],row['classID']))

In [None]:
#SOUND FEATURE EXTRACTION
import numpy as np

hop_length = 256
frame_length = 512

features = []

for name,sound,fold,classid in raw_sounds:
  mfcc_coefficients = np.mean(librosa.feature.mfcc(y=sound, sr=sample_rate, n_mfcc=20).T,axis=0)
  stft = np.abs(librosa.stft(sound))
  chromas = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
  zcr = np.mean(librosa.feature.zero_crossing_rate(sound,hop_length=hop_length,frame_length=frame_length))
  rms = librosa.feature.rms(sound, frame_length=frame_length, hop_length=hop_length, center=True)[0]
  rms_mean = np.mean(rms)
  rms_std = np.std(rms)
  rms_min = np.min(rms)
  rms_max = np.max(rms)
  features.append(np.hstack(([name],mfcc_coefficients,chromas,[zcr],[rms_mean,rms_std,rms_min,rms_max],[fold,classid])))

dfsound = pd.DataFrame(features, columns= ['slice_file_name'] \
                          + [f'mfcc_{i}' for i in range(len(mfcc_coefficients))] \
                          + [f'chroma_{i}' for i in range(len(chromas))] \
                          + ['zcr'] \
                          + ['rms_mean','rms_std','rms_min','rms_max'] + ['fold','classID'])

In [None]:
import pandas as pd
#dfsound contain slice_file_name, sound features, classID
dfsound.to_csv('urbansound_features.csv',index=False)
dfsound = pd.read_csv(directory+'urbansound_features.csv')

In [None]:
dfsound.tail()

In [None]:
#Train the model on folds: 1, 2, 3, 4, 6, and test it on folds: 5, 7, 8, 9, 10

train_folds = [1,2,3,4,6]
test_folds = [5,7,8,9,10]

In [None]:
#SPECTOGRAM CONSTRUCTION
import os
import math
import librosa
import matplotlib.pyplot as plt

cmap = plt.get_cmap('inferno')
plt.figure(figsize=(8,8))
count = 0
sample_rate = 44100

for fold in range(1,11):
  path = f'UrbanSound8K/audio/fold{fold}/'
  for file in os.listdir(path):
    if "wav" in file:
        file_path = path + file
        y, sr = librosa.load(file_path, sr = sample_rate)
        #print(y.shape)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');

        try:
          os.mkdir(f'UrbanSound8K/img_data/')
        except FileExistsError:
          pass
        try:
          os.mkdir(f'UrbanSound8K/img_data/fold{fold}')
        except FileExistsError:
          pass
        
        plt.savefig(f'UrbanSound8K/img_data/fold{fold}/{file[:-4]}.png',transparent=True, pad_inches=0.0)
        plt.clf()

        count += 1
        if count % 1000 == 0:
          print("Processed ", count, " files")

In [None]:
#NORMALIZATION
from sklearn.preprocessing import StandardScaler

train = dfsound[dfsound['fold'].isin(train_folds)]
test = dfsound[dfsound['fold'].isin(test_folds)]

data_train = train.drop(['slice_file_name','fold','classID'],axis=1)
data_test = test.drop(['slice_file_name','fold','classID'],axis=1)

scaler = StandardScaler()
X_train = scaler.fit_transform(data_train)
X_test = scaler.fit_transform(data_test)
y_train = train['classID']
y_test = test['classID']

In [None]:
#TRAIN TEST SPLIT
test_per_fold = [test[test['fold']==f] for f in test_folds]
data_test_per_fold = list(map(lambda df: df.drop(['slice_file_name','fold','classID'],axis=1),test_per_fold))
X_test_per_fold = list(map(lambda data: scaler.fit_transform(data),data_test_per_fold))
y_test_per_fold = list(map(lambda df: df['classID'],test_per_fold))

In [None]:
import numpy as np

validation_data = (X_test,y_test.values)

# Audio Player utility

In [None]:
"""
import IPython.display as ipd
numTrack = 0 #number of track in dfsound dataset
nameTrack = dfsound['slice_file_name'][numTrack]
fold = dfsound[dfsound['slice_file_name']==nameTrack]['fold'].values[0]
fpath = f'UrbanSound8K/audio/fold{fold}/{nameTrack}'
ipd.Audio(fpath)
"""

# Classification

In [None]:
#Some imports and plot settings
%matplotlib inline

import math
import matplotlib.pyplot as plt
import numpy as np

import os, re, math, json, shutil, pprint
import tensorflow as tf
from matplotlib import pyplot as plt
import math
from google.colab import output
from IPython.display import clear_output

print("Tensorflow version " + tf.__version__)

np.set_printoptions(precision=3, threshold=11)

# Matplotlib config
plt.style.use('seaborn')
# Matplotlib fonts
MATPLOTLIB_FONT_DIR = os.path.join(os.path.dirname(plt.__file__),
                                   "mpl-data/fonts/ttf")

## FFNN

In [None]:
num_class = len(dfsound['classID'].unique())

In [None]:
from keras.models import Sequential
from keras import layers

NUM_EPOCH = 100
BATCH_SIZE = 128
SAMPLE_SIZE = len(dfsound)

In [None]:
import tensorflow as tf

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

### Zero hidden-layer

In [None]:
history_ffnn = []
ffnn_labels = ['0hidden','1-hidden','2-hidden','3-hidden']

In [None]:
ffnn_0hidden = Sequential()
ffnn_0hidden.add(layers.Input(shape=(X_train.shape[1],)))
ffnn_0hidden.add(layers.Dense(num_class, activation='softmax'))
ffnn_0hidden.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_0hidden.summary()

In [None]:
history = ffnn_0hidden.fit(X_train,
                    y_train,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data,
                    validation_steps=1,
                    callbacks = [early_stopping]
                    )
history_ffnn.append(history)       

In [None]:
ITOFOLD = {i:str(tf) for i,tf in zip(range(0,len(test_folds)),test_folds)}

In [None]:
def evaluate_on_test_folds(model,X_test_per_fold,y_test_per_fold):
  """
    Given the test folds returns the distribution of the accuracies along them.
  """
  accuracies = []
  for X_test,y_test in zip(X_test_per_fold,y_test_per_fold):
    _,acc = model.evaluate(X_test,y_test, batch_size=BATCH_SIZE)
    accuracies.append(acc)
  return accuracies

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(ffnn_0hidden,X_test_per_fold,y_test_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('ffnn_0hidden_acc_test_folds.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

### One hidden-layer

In [None]:
# lr decay function
def lr_decay(epoch):
  return 0.01 * math.pow(0.95, epoch)

# lr schedule callback
lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

def plot_learning_rate(lr_func, epochs):
  xx = np.arange(epochs+1, dtype=np.float)
  y = [lr_decay(x) for x in xx]
  fig, ax = plt.subplots(figsize=(9, 6))
  ax.set_xlabel('epochs')
  ax.set_title('Learning rate\ndecays from {:0.3g} to {:0.3g}'.format(y[0],
                                                                      y[-2]))
  ax.minorticks_on()
  ax.grid(True, which='major', axis='both', linestyle='-', linewidth=1)
  ax.grid(True, which='minor', axis='both', linestyle=':', linewidth=0.5)
  ax.step(xx,y, linewidth=3, where='post')
  plt.savefig('lr-decay.pdf',bbox_inches='tight')
  display(fig)

plot_learning_rate(lr_decay, NUM_EPOCH)


In [None]:
#One hidden-layer

ffnn_1hidden = Sequential()
ffnn_1hidden.add(layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
ffnn_1hidden.add(layers.Dropout(0.25))
ffnn_1hidden.add(layers.Dense(num_class, activation='softmax'))
ffnn_1hidden.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_1hidden.summary()

In [None]:
history = ffnn_1hidden.fit(X_train,
                    y_train,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data,
                    validation_steps=1,
                    callbacks=[lr_decay_callback,early_stopping])

history_ffnn.append(history)

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(ffnn_1hidden,X_test_per_fold,y_test_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('ffnn1hidden_acc_test_folds.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

### Two hidden-layer

In [None]:
#two hidden-layer

ffnn_2hidden = Sequential()
ffnn_2hidden.add(layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
ffnn_2hidden.add(layers.Dropout(0.25))
ffnn_2hidden.add(layers.Dense(24, activation='relu'))
ffnn_2hidden.add(layers.Dropout(0.25))
ffnn_2hidden.add(layers.Dense(num_class, activation='softmax'))
ffnn_2hidden.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_2hidden.summary()

In [None]:
history = ffnn_2hidden.fit(X_train,
                    y_train,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data,
                    validation_steps=1,
                    callbacks = [lr_decay_callback,early_stopping]
                    )

history_ffnn.append(history)

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(ffnn_2hidden,X_test_per_fold,y_test_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('ffnn2hidden_acc_test_folds.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

### Three hidden-layer

In [None]:
#three hidden-layer

ffnn_3hidden = Sequential()
ffnn_3hidden.add(layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
ffnn_3hidden.add(layers.Dropout(0.25))
ffnn_3hidden.add(layers.Dense(24, activation='relu'))
ffnn_3hidden.add(layers.Dropout(0.25))
ffnn_3hidden.add(layers.Dense(16, activation='relu'))
ffnn_3hidden.add(layers.Dropout(0.25))
ffnn_3hidden.add(layers.Dense(num_class, activation='softmax'))
ffnn_3hidden.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_3hidden.summary()

In [None]:
history = ffnn_3hidden.fit(X_train,
                    y_train,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data,
                    validation_steps=1,
                    callbacks = [lr_decay_callback,early_stopping]
                    )

history_ffnn.append(history)

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(ffnn_3hidden,X_test_per_fold,y_test_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('ffnn3hidden_acc_test_folds.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

In [None]:
plt.figure(figsize=(15,5))
for i in range(1,4):
    plt.plot(history_ffnn[i].history['val_accuracy'])  
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(ffnn_labels[1:], loc='upper left')
plt.savefig('ffnn_val_acc.pdf')
plt.show()


### Dimensionality reduction

#### Boruta

In [None]:
!pip install boruta

In [None]:
from sklearn.ensemble import RandomForestClassifier
from boruta import BorutaPy
from multiprocessing import cpu_count

boruta_selector = BorutaPy(
        RandomForestClassifier(n_jobs=cpu_count(), class_weight='balanced', max_depth=5),
        n_estimators='auto',
        verbose=2,
        alpha=0.05, # p_value
        max_iter=100,
        random_state=42
)

boruta_result = boruta_selector.fit_transform(X_train, y_train)

#### PCA

##### One for all

In [None]:
from sklearn.decomposition import PCA

In [None]:
#PCA them all!

X_train_pca = PCA(n_components=20).fit_transform(X_train)
validation_data_pca = (PCA(n_components=20).fit_transform(validation_data[0]),validation_data[1])
X_test_per_fold_pca = list(map(lambda X: PCA(n_components=20).fit_transform(X),X_test_per_fold))

In [None]:
ffnn_pca = Sequential()
ffnn_pca.add(layers.Dense(32, activation='relu', input_shape=(X_train_pca.shape[1],)))
ffnn_pca.add(layers.Dropout(0.25))
ffnn_pca.add(layers.Dense(24, activation='relu'))
ffnn_pca.add(layers.Dropout(0.25))
ffnn_pca.add(layers.Dense(num_class, activation='softmax'))
ffnn_pca.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_pca.summary()

In [None]:
history = ffnn_pca.fit(X_train_pca,
                    y_train,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data_pca,
                    validation_steps=1,
                    callbacks = [lr_decay_callback,early_stopping]
                    )

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(ffnn_pca,X_test_per_fold_pca,y_test_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('ffnnpca_acc_test_folds.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

##### Different PCAs for different features

In [None]:
#One PCA for mfcc from 20 to 10
#One PCA for chroma from 12 to 5
#Concatenate mfcc, chroma, other features(zcr + rmse)

def pcaSchema(X,num_mfcc = 20, num_chroma = 12):
  X_mfcc = X[:,:num_mfcc]
  X_pca_mfcc = PCA(n_components=10).fit_transform(X_mfcc)
  X_chroma = X[:,num_mfcc:(num_mfcc + num_chroma)]
  X_chroma_pca = PCA(n_components=5).fit_transform(X_chroma)
  X_pcaSchema = np.hstack((X_pca_mfcc,X_chroma_pca,X[:,(num_mfcc+num_chroma):]))
  return X_pcaSchema

X_train_pcaSchema= pcaSchema(X_train)
validation_data_pcaSchema = (pcaSchema(validation_data[0]),validation_data[1])
X_test_per_fold_pcaSchema = list(map(lambda X: pcaSchema(X),X_test_per_fold))

In [None]:
ffnn_pcaSchema = Sequential()
ffnn_pcaSchema.add(layers.Dense(32, activation='relu', input_shape=(X_train_pcaSchema.shape[1],)))
ffnn_pcaSchema.add(layers.Dropout(0.25))
ffnn_pcaSchema.add(layers.Dense(24, activation='relu'))
ffnn_pcaSchema.add(layers.Dropout(0.25))
ffnn_pcaSchema.add(layers.Dense(num_class, activation='softmax'))
ffnn_pcaSchema.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_pcaSchema.summary()

In [None]:
history = ffnn_pcaSchema.fit(X_train_pcaSchema,
                    y_train,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data_pcaSchema,
                    validation_steps=1,
                    callbacks = [lr_decay_callback,early_stopping]
                    )

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(ffnn_pcaSchema,X_test_per_fold_pcaSchema,y_test_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('ffnnpcaSchema_acc_test_folds.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

## CNN

In [None]:
import pandas as pd

dfsound = pd.read_csv(directory+'urbansound_features.csv')
dfsound.head()

In [None]:
#!unzip drive/MyDrive/UrbanSound/UrbanSound_img.zip

In [None]:
from tensorflow.keras.preprocessing.image import load_img,img_to_array,array_to_img

def getTFDataset(numFold,metadata):
    """
      Given the id of a certain fold and the metadata file,
      it returns the vectorial representation of the spectogram files in the fold along with their labels
    """
    pathFold = f'UrbanSound8K/img_data/fold{numFold}/'
    img = []
    labels = []

    for file in os.listdir(pathFold):
      if 'png' not in file: continue
      image_path = pathFold+file
      image = load_img(image_path,target_size=(64,64))
      x = img_to_array(image)
      filename = file.split('.')[0]
      wav = filename + '.wav'
      y = metadata[metadata['slice_file_name']==wav]['classID'].values[0]
      img.append(x)
      labels.append(y)
    
    return img,labels

In [None]:
#Specgram train set construction
import numpy as np

train_img, train_labels = [],[]
for numFold in train_folds:
  img,labels = getTFDataset(numFold,dfsound)
  train_img  = train_img + img
  train_labels = train_labels + labels

train_img = np.asarray(train_img)
train_labels = np.asarray(train_labels)

In [None]:
#Specgram test set construction
test_img_per_fold, test_labels_per_fold = [],[]

for numFold in test_folds:
  img,labels = getTFDataset(numFold,dfsound)
  test_img_per_fold.append(img)
  test_labels_per_fold.append(labels)

test_img_per_fold = list(map(lambda x: np.asarray(x),test_img_per_fold))
test_labels_per_fold = list(map(lambda x: np.asarray(x),test_labels_per_fold))

In [None]:
validation_img = [e for f in test_img_per_fold for e in f]
validation_labels = [e for y in test_labels_per_fold for e in y]
validation_data_img = (np.asarray(validation_img),np.asarray(validation_labels))

In [None]:
from keras.models import Sequential
from keras import layers

#ARCHITECTURES: 1 / 2 / 3 Conv2D Layers with 16 / 32 / 64 neurons + 32 / 64 / 128 Dense hidden neurons

num_conv2d_layers = [1,2,3]
num_conv2d_neurons = [16,32,64]
num_dense_neurons = [32,64,128]

cnns_labels = []
cnns = []

assert(len(num_conv2d_layers) == len(num_dense_neurons))

for num_layers in num_conv2d_layers:
  for num_dense in num_dense_neurons:
    cnns_labels.append(f'{num_layers}Conv2D_{num_dense}Dense')
    cnn = Sequential()
    cnn.add(layers.Rescaling(1./255,input_shape=(64,64,3)))
    for i in range(num_layers):
      cnn.add(layers.Conv2D(num_conv2d_neurons[i],3,padding='same',activation='relu'))
      cnn.add(layers.MaxPooling2D())
    cnn.add(layers.Flatten())
    cnn.add(layers.Dense(num_dense,activation='relu'))
    cnn.add(layers.Dense(10,activation='softmax'))
    cnn.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    cnns.append(cnn)

history_cnn = []
for i in range(len(num_conv2d_layers)*len(num_dense_neurons)):
  print(f'{cnns_labels[i]} START')
  history = cnns[i].fit(train_img,
                    train_labels,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data = validation_data_img,
                    validation_steps = 1,
                    callbacks = [early_stopping]
                    )         
  print(f'{cnns_labels[i]} END')
  print()
  history_cnn.append(history)

In [None]:
x = list(ITOFOLD.values())
for i in range(len(num_conv2d_layers)*len(num_dense_neurons)):
  accuracies = evaluate_on_test_folds(cnns[i],test_img_per_fold,test_labels_per_fold)
  plt.barh(x,accuracies,color='#ff7f0e')
  plt.xlabel('accuracy')
  plt.ylabel('test fold')
  plt.savefig(f'{cnns_labels[i]}.pdf',bbox_inches='tight')
  plt.show()
  print(f'{cnns_labels[i]}')
  print(f'Avg accuracy: {np.mean(accuracies)}')
  print(f'Std accuracy: {np.std(accuracies)}')
  print()

In [None]:
plt.figure(figsize=(15,5))
for i in range(6,9):
    plt.plot(history_cnn[i].history['val_accuracy'])  
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(cnns_labels[6:9], loc='upper left')
plt.savefig('CNN_3Conv2D_val_acc.pdf')
plt.show()

### Choose kernel size

In [None]:
x = list(ITOFOLD.values())
acc_ks = []
for ks in range(1,6): #grid search on kernel size in range [1,5]
  print(f'KS = {ks} start')
  cnn = Sequential()
  cnn.add(layers.Rescaling(1./255,input_shape=(64,64,3)))
  cnn.add(layers.Conv2D(16,ks,padding='same',activation='relu'))
  cnn.add(layers.MaxPooling2D())
  cnn.add(layers.Conv2D(32,ks,padding='same',activation='relu'))
  cnn.add(layers.MaxPooling2D())
  cnn.add(layers.Conv2D(64,ks,padding='same',activation='relu'))
  cnn.add(layers.MaxPooling2D())
  cnn.add(layers.Flatten())
  cnn.add(layers.Dense(128,activation='relu'))
  cnn.add(layers.Dense(10,activation='softmax'))

  cnn.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
  
  history = cnn.fit(train_img,
                    train_labels,
                    epochs=NUM_EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_data = validation_data_img,
                    validation_steps = 1,
                    callbacks = [early_stopping]
                    )
  
  accuracies = evaluate_on_test_folds(cnn,test_img_per_fold,test_labels_per_fold)
  plt.barh(x,accuracies,color='#ff7f0e')
  plt.xlabel('accuracy')
  plt.ylabel('test fold')
  plt.savefig(f'cnn_ks{ks}.pdf',bbox_inches='tight')
  plt.show()
  print(f'kernel-size: {ks}')
  print(f'Avg accuracy: {np.mean(accuracies)}')
  print(f'Std accuracy: {np.std(accuracies)}')
  acc_ks.append(np.mean(accuracies))
  print()
  print(f'KS = {ks} END')
  print()

In [None]:
plt.plot(range(1,6),acc_ks)
plt.xlabel('kernel-size')
plt.ylabel('accuracy')
plt.savefig('CNN_kernel-size_acc.pdf',bbox_inches='tight')
plt.show()

## MMNN

In [None]:
from tensorflow.keras import Model

#FFNN

input_ffnn =  layers.Input(shape=(X_train.shape[1]), name="features_data")
hidden = layers.Dense(32,activation='relu')(input_ffnn)
dropout = layers.Dropout(0.25)(hidden)
hidden = layers.Dense(24, activation='relu')(dropout)
dropout = layers.Dropout(0.25)(hidden)
last_hidden_ffnn = dropout


#CNN

input_cnn = layers.Input(shape=(64,64,3), name = 'specgram_data')
rescaling = layers.Rescaling(1./255)(input_cnn)
conv2d = layers.Conv2D(16,3,padding='same',activation='relu')(rescaling)
pooling = layers.MaxPooling2D()(conv2d)
conv2d = layers.Conv2D(32,3,padding='same',activation='relu')(pooling)
pooling = layers.MaxPooling2D()(conv2d)
conv2d = layers.Conv2D(64,3,padding='same',activation='relu')(pooling)
pooling = layers.MaxPooling2D()(conv2d)
flatten = layers.Flatten()(pooling)
last_hidden_cnn = layers.Dense(128,activation='relu')(flatten)

#MMNN
concatenate = layers.Concatenate()([last_hidden_ffnn,last_hidden_cnn])
last_hidden_mmnn = layers.Dense(64,activation='relu')(concatenate)
output_mmnn = layers.Dense(10,activation='softmax')(last_hidden_mmnn)

mmnn = Model(
  inputs=[input_ffnn, input_cnn],
  outputs=output_mmnn,
  name="MMNN"
)

mmnn.compile(
  optimizer="adam",
  loss="sparse_categorical_crossentropy",
  metrics=['accuracy']
)



mmnn.summary()

In [None]:
X_mmnn = {
    'features_data': X_train,
    'specgram_data': train_img
}

y_mmnn = y_train

validation_mmnn = ({
    'features_data': validation_data[0],
    'specgram_data': validation_data_img[0]
}, validation_data[1])

In [None]:
history = mmnn.fit(X_mmnn,
                    y_mmnn,
                    epochs=40,
                    batch_size=BATCH_SIZE,
                    validation_data = validation_mmnn,
                    validation_steps = 1,
                    callbacks = []
                    )         

In [None]:
test_mmnn_per_fold = []
labels_mmnn_per_fold = []
for xf,xs,y in zip(X_test_per_fold,test_img_per_fold,y_test_per_fold):
  test_mmnn_per_fold.append(
      {
      'features_data': xf,
      'specgram_data': xs
  })
  labels_mmnn_per_fold.append(y)

In [None]:
x = list(ITOFOLD.values())
accuracies = evaluate_on_test_folds(mmnn,test_mmnn_per_fold,labels_mmnn_per_fold)
plt.barh(x,accuracies,color='#ff7f0e')
plt.xlabel('accuracy')
plt.ylabel('test fold')
plt.savefig('mmnn_acc.pdf',bbox_inches='tight')
plt.show()

In [None]:
print(f'Avg accuracy: {np.mean(accuracies)}')
print(f'Std accuracy: {np.std(accuracies)}')

In [None]:
ffnn_2hidden = Sequential()
ffnn_2hidden.add(layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
ffnn_2hidden.add(layers.Dropout(0.25))
ffnn_2hidden.add(layers.Dense(24, activation='relu'))
ffnn_2hidden.add(layers.Dropout(0.25))
ffnn_2hidden.add(layers.Dense(num_class, activation='softmax'))
ffnn_2hidden.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

ffnn_2hidden.summary()

In [None]:
history2hidden = ffnn_2hidden.fit(X_train,
                    y_train,
                    epochs=40,
                    batch_size=BATCH_SIZE,
                    validation_data=validation_data,
                    validation_steps=1,
                    callbacks = [lr_decay_callback]
                    )

In [None]:
best_cnn = Sequential()
best_cnn.add(layers.Rescaling(1./255,input_shape=(64,64,3)))
best_cnn.add(layers.Conv2D(16,3,padding='same',activation='relu'))
best_cnn.add(layers.MaxPooling2D())
best_cnn.add(layers.Conv2D(32,3,padding='same',activation='relu'))
best_cnn.add(layers.MaxPooling2D())
best_cnn.add(layers.Conv2D(64,3,padding='same',activation='relu'))
best_cnn.add(layers.MaxPooling2D())
best_cnn.add(layers.Flatten())
best_cnn.add(layers.Dense(128,activation='relu'))
best_cnn.add(layers.Dense(10,activation='softmax'))

best_cnn.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
  
history_bestcnn = best_cnn.fit(train_img,
                    train_labels,
                    epochs=40,
                    batch_size=BATCH_SIZE,
                    validation_data = validation_data_img,
                    validation_steps = 1,
                    callbacks = []
                    )

In [None]:
nnlabels = ['MMNN','FFNN','CNN']
plt.figure(figsize=(15,5))
for h in [history,history2hidden,history_bestcnn]:
    plt.plot(h.history['val_accuracy'])  
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(nnlabels, loc='upper left')
plt.savefig('best_nn_acc.pdf',bbox_inches='tight')
plt.show()