# Background

The urbansounds dataset proved to be very noisy. A lot of the classes were either irrelevant to our work or too similar for spectrograms to be distinguishable.

We decided to narrow the focus of our modelling efforts by selecting a few categories that could be relevant to the task at hand.

As such we selected a few classes from the FSD project - https://zenodo.org/record/2552860#.XIG9LMtKg5l and of course or own SoundFlux dataset.

As of March 7th, the classes used are as followes:

- Falling Dummy (simulated human falls from Rescue Randy) from SoundFlux
- General noise from SoundFlux
- Telephone - from FSD
- Laughter - from FSD
- Knock - from FSD

Get the data from: https://s3.amazonaws.com/soundflux-urbansounds/curated_4_class_bundle.zip (should be named '5 class bundle' ... will rename at some point)

# Loading the data and training the model

In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
set_session(sess)

Using TensorFlow backend.


In [2]:
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import json
from PIL import Image

In [3]:
def generate_generator_multiple(generator,directories, batch_size, img_height,img_width):
    generators =[]
    for directory in directories:
        gen = generator.flow_from_directory(directory,
                                          target_size = (img_height,img_width),
                                          class_mode = 'categorical',
                                          batch_size = batch_size,
                                          shuffle=True, 
                                          seed=7)
    
        generators.append(gen)

    for gen in generators:
        for data, labels in gen:
            yield data, labels

In [4]:
def save_plots(history,target_file_acc,target_file_loss):
    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(target_file_acc)
    plt.close()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(target_file_loss)
    plt.close()

In [None]:
def show_plots(history):
    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Defining relevant parameters

In [None]:
batch_size = 32
img_height=80
img_width = 256
approx_fold_size = 1000

In [None]:
train_folder = "/home/nvidia/bundle/split/train/"
test_folder = "/home/nvidia/bundle/split/test/"

In [None]:
datagen = ImageDataGenerator(rescale=1./255,
                            #rotation_range=10,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            shear_range=0.2,
                            zoom_range=0.3,
                            #horizontal_flip=True,
                            #vertical_flip=True,
                            fill_mode='nearest')

In [None]:
input_shape = (img_height, img_width,3)
nclass = 5

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers

# Actual Model

In [None]:
base_model = keras.applications.vgg16.VGG16(weights='imagenet', 
                                include_top=False, 
                                input_shape=(img_height, img_width,3))

In [None]:
len(base_model.layers)

19

In [None]:
#base_model.trainable = False
for layer in base_model.layers[:15]:
    layer.trainable = False
"""   #Adding custom Layers
   x = model.output
   x = Flatten()(x)
   x = Dense(4096, activation="relu")(x)
   x = Dropout(0.5)(x)
   x = Dense(4096, activation="relu")(x)
   x = Dropout(0.5)(x)
"""
model = keras.models.Sequential()
model.add(base_model)
model.add(keras.layers.GlobalAveragePooling2D())
model.add(keras.layers.Dense(512,activation='relu'))
model.add(keras.layers.Dense(64,activation='relu'))
model.add(keras.layers.Dense(32,activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(nclass, activation='softmax'))

In [None]:
opt = optimizers.RMSprop(lr=0.0001,decay=1e-3)
#opt = optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', 
              optimizer=opt,
              metrics=['accuracy'])
#needed to reset weigh"""ts!
model.save_weights('raw_model.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 2, 8, 512)         14714688  
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               262656    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                32832     
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 165       
Total para

# Data Generator

In [None]:
train_generator = generate_generator_multiple(generator=datagen,
                                           directories = [train_folder],
                                           batch_size=batch_size,
                                           img_height=img_height,
                                           img_width=img_width)
test_generator = generate_generator_multiple(generator=datagen,
                                           directories = [test_folder],
                                           batch_size=batch_size,
                                           img_height=img_height,
                                           img_width=img_width)

In [None]:
#RESET WEIGHTS!!
#model.load_weights('raw_model.h5')
#
history = model.fit_generator(train_generator,
                          steps_per_epoch=approx_fold_size/batch_size,
                          validation_data = test_generator,
                          validation_steps = approx_fold_size/batch_size,
                          epochs=20,
                          shuffle=True, 
                          verbose=True)

Epoch 1/20
Found 432 images belonging to 5 classes.
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Best so far -> lr = 0.0001, decay=1e-3, 13 epochs. Filal val accuracy = 92, loss = 0.4309

# Test on the final model

In [None]:
test_generator = generate_generator_multiple(generator=datagen,
                                           directories = [test_folder],
                                           batch_size=batch_size,
                                           img_height=img_height,
                                           img_width=img_width)
model.evaluate_generator(test_generator,
                              steps=approx_fold_size/batch_size,
                              verbose=True)

In [None]:
model.save_weights('curated_model_v1_unfrozen_layers.h5')

The model weights were saved on the very first run of this model and ara available here: 
https://s3.amazonaws.com/soundflux-urbansounds/curated_model_v1.zip