In [1]:
import numpy as np 
import os
import pandas as pd 
from scipy.io import wavfile

import librosa
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers, activations
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.utils import np_utils, to_categorical

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from datetime import datetime 

from matplotlib import pyplot as plt

In [2]:
us8k_df = pd.read_pickle("us8k_df.pkl")

In [3]:
def init_model():
    model1 = Sequential()
    
    #layer-1
    model1.add(Conv2D(filters=24, kernel_size=5, input_shape=(128, 128, 1),
                      kernel_regularizer=regularizers.l2(1e-3)))
    model1.add(MaxPooling2D(pool_size=(3,3), strides=3))
    model1.add(Activation(activations.relu))
    
    #layer-2
    model1.add(Conv2D(filters=36, kernel_size=4, padding='valid', kernel_regularizer=regularizers.l2(1e-3)))
    model1.add(MaxPooling2D(pool_size=(2,2), strides=2))
    model1.add(Activation(activations.relu))
    
    #layer-3
    model1.add(Conv2D(filters=48, kernel_size=3, padding='valid'))
    model1.add(Activation(activations.relu))
    
    model1.add(GlobalAveragePooling2D())
    
    #layer-4 (1st dense layer)
    model1.add(Dense(60, activation='relu'))
    model1.add(Dropout(0.5))
    
    #layer-5 (2nd dense layer)
    model1.add(Dense(10, activation='softmax'))

    
    # compile
    model1.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    
    return model1

In [4]:
model = init_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 124, 124, 24)      624       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 41, 41, 24)        0         
_________________________________________________________________
activation (Activation)      (None, 41, 41, 24)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 38, 38, 36)        13860     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 19, 19, 36)        0         
_________________________________________________________________
activation_1 (Activation)    (None, 19, 19, 36)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 17, 17, 48)        1

In [5]:
def train_test_split(fold_k, data, X_dim=(128, 128, 1)):
    X_train = np.stack(data[data.fold != fold_k].melspectrogram.to_numpy())
    X_test = np.stack(data[data.fold == fold_k].melspectrogram.to_numpy())

    y_train = data[data.fold != fold_k].label.to_numpy()
    y_test = data[data.fold == fold_k].label.to_numpy()

    XX_train = X_train.reshape(X_train.shape[0], *X_dim)
    XX_test = X_test.reshape(X_test.shape[0], *X_dim)
    
    yy_train = to_categorical(y_train)
    yy_test = to_categorical(y_test)
    
    return XX_train, XX_test, yy_train, yy_test

In [6]:
def process_fold(fold_k, data, epochs=100, num_batch_size=32):
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(fold_k, data)

    # init data augmention
    #train_datagen, val_datagen = init_data_aug()
    
    # fit augmentation
    #train_datagen.fit(X_train)
    #val_datagen.fit(X_train)

    # init model
    model = init_model()

    # pre-training accuracy
    score = model.evaluate(X_test, y_test, batch_size=num_batch_size, verbose=0)
    print("Pre-training accuracy: %.4f%%\n" % (100 * score[1]))
    
    # train the model
    start = datetime.now()
    history = model.fit(X_train,y_train, epochs=epochs,validation_data=(X_test,y_test),batch_size=num_batch_size)
    end = datetime.now()
    print("Training completed in time: ", end - start, '\n')
    
    return history

In [7]:
def show_results(tot_history):
    """Show accuracy and loss graphs for train and test sets."""

    for i, history in enumerate(tot_history):
        print('\n({})'.format(i+1))

        plt.figure(figsize=(15,5))

        plt.subplot(121)
        plt.plot(history.history['accuracy'])
        plt.plot(history.history['val_accuracy'])
        plt.grid(linestyle='--')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['train', 'validation'], loc='upper left')

        plt.subplot(122)
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.grid(linestyle='--')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['train', 'validation'], loc='upper left')
            
        plt.show()

        print('\tMax validation accuracy: %.4f %%' % (np.max(history.history['val_accuracy']) * 100))
        print('\tMin validation loss: %.5f' % np.min(history.history['val_loss']))

In [8]:
FOLD_K = 1

history1 = []

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)

--------------------------------------------------------------------------------

(1)

Pre-training accuracy: 12.7148%

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training completed in time:  0:03:15.241489 



In [9]:
FOLD_K = 2

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=20)
history1.append(history)

--------------------------------------------------------------------------------

(2)

Pre-training accuracy: 13.5135%

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training completed in time:  0:06:30.990396 



In [10]:
FOLD_K = 3

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=20)
history1.append(history)

--------------------------------------------------------------------------------

(3)

Pre-training accuracy: 12.8649%

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training completed in time:  0:06:29.415879 



In [11]:
FOLD_K = 4

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)

--------------------------------------------------------------------------------

(4)

Pre-training accuracy: 10.1010%

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training completed in time:  0:03:13.796932 



In [12]:
FOLD_K = 5

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=30)
history1.append(history)

--------------------------------------------------------------------------------

(5)

Pre-training accuracy: 10.6838%

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training completed in time:  0:09:38.833439 



In [13]:
FOLD_K = 6

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=30)
history1.append(history)

--------------------------------------------------------------------------------

(6)

Pre-training accuracy: 5.5893%

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training completed in time:  0:09:42.139028 



In [14]:
FOLD_K = 7

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=30)
history1.append(history)

--------------------------------------------------------------------------------

(7)

Pre-training accuracy: 11.9332%

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Training completed in time:  0:11:02.430352 



In [None]:
FOLD_K = 8

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)

In [None]:
FOLD_K = 9

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)

In [None]:
FOLD_K = 10

print('-'*80)
print("\n({})\n".format(FOLD_K))
    
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)