# Background

The urbansounds dataset proved to be very noisy. A lot of the classes were either irrelevant to our work or too similar for spectrograms to be distinguishable.

We decided to narrow the focus of our modelling efforts by selecting a few categories

This specific model uses two classes collected by the soundflux team. These classes (falling_dummy and falling_object) were augmented by overlaying them with random samples of seven different audio clips, ranging from a group talking in the background to the sound of running water in a shower. The idea was to make the model generalize well under unseen data

As of April 12th, the classes used are as follows:

- Falling Dummy (simulated human falls from Rescue Randy) from SoundFlux plus all the augmented iterations
- Overlay noise from SoundFlux used to augment
- Falling Object (semi-bouncy object falling) plus all the augmented iterations

Get the data from: https://s3.amazonaws.com/soundflux-urbansounds/soundflux_augmented_three_class_dataset.zip

# Loading the data and training the model

In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
set_session(sess)

Using TensorFlow backend.


In [2]:
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import json
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [3]:
"/media/romulo/6237-3231/curated_soundflux_three_classes/test"def save_plots(history,target_file_acc,target_file_loss):
    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(target_file_acc)
    plt.close()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(target_file_loss)
    plt.close()

In [4]:
def show_plots(history):
    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Defining relevant parameters

In [None]:
batch_size = 32
img_height=80
img_width = 256
approx_fold_size = 9000

In [None]:
train_folder = "/home/nvidia/Downloads/soundflux_augmented/spectrograms/split/train/"
test_folder = "/home/nvidia/Downloads/soundflux_augmented/spectrograms/split/test/"

In [None]:
datagen = ImageDataGenerator(rescale=1./255,
                            #rotation_range=10,
                            width_shift_range=0.2,
                            height_shift_range=0.1,
                            shear_range=0.2,
                            zoom_range=0.3,
                            #horizontal_flip=True,
                            #vertical_flip=True,
                            fill_mode='nearest')

In [None]:
input_shape = (img_height, img_width,3)
nclass = 3

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers

# Actual Model

In [None]:
base_model = keras.applications.vgg16.VGG16(weights='imagenet', 
                                include_top=False, 
                                input_shape=(img_height, img_width,3))

In [None]:
len(base_model.layers)

19

In [None]:
#base_model.trainable = False
for layer in base_model.layers[:15]:
    layer.trainable = False
"""   #Adding custom Layers
"""
model = keras.models.Sequential()
model.add(base_model)
model.add(keras.layers.GlobalAveragePooling2D())
model.add(keras.layers.Dense(512,activation='relu'))
model.add(keras.layers.Dense(64,activation='relu'))
model.add(keras.layers.Dense(32,activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(nclass, activation='softmax'))

In [None]:
opt = optimizers.RMSprop(lr=0.0001,decay=1e-3)
#opt = optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', 
              optimizer=opt,
              metrics=['accuracy'])
#needed to reset weigh"""ts!
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 2, 8, 512)         14714688  
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               262656    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                32832     
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 99        
Total para

# Data Generator

In [None]:
train_generator = datagen.flow_from_directory(train_folder,
                                          target_size = (img_height,img_width),
                                          class_mode = 'categorical',
                                          batch_size = batch_size,
                                          shuffle=True, 
                                          seed=7)

test_generator = datagen.flow_from_directory(test_folder,
                                          target_size = (img_height,img_width),
                                          class_mode = 'categorical',
                                          batch_size = batch_size,
                                          shuffle=True, 
                                          seed=7)


Found 15861 images belonging to 3 classes.
Found 4976 images belonging to 3 classes.


In [None]:
train_generator.class_indices

{'falling_dummy': 0, 'falling_object': 1, 'overlay_noise': 2}

In [None]:
#RESET WEIGHTS!!
#model.load_weights('raw_model.h5')
#
history = model.fit_generator(train_generator,
                          steps_per_epoch=approx_fold_size/batch_size,
                          validation_data = test_generator,
                          validation_steps = 1000/batch_size,
                          epochs=20,
                          shuffle=True, 
                          verbose=True)

Epoch 1/20
Epoch 2/20
 63/281 [=====>........................] - ETA: 3:29 - loss: 0.1850 - acc: 0.9335

# Test on the final model

In [None]:
test_generator = generate_generator_multiple(generator=datagen,
                                           directories = [test_folder],
                                           batch_size=batch_size,
                                           img_height=img_height,
                                           img_width=img_width)
model.evaluate_generator(test_generator,
                              steps=approx_fold_size/batch_size,
                              verbose=True)

In [None]:
model.save_weights('augmented_model_three_classes_unfrozen_layers_v1.h5')


In [None]:

Object URL
https://s3.amazonaws.com/soundflux-urbansounds/augmented_three_classes_vgg16_retrained_model.zipmodel_json = model.to_json()
with open("augmented_model_three_classes_unfrozen_layers_v1.json", "w") as json_file:
    json_file.write(model_json)

In [None]:
with open("augmented_model_three_classes_unfrozen_layers_v1_class_indices.json", "w") as json_file:
    train_generator.class_indices
    json_file.write(json.dumps(train_generator.class_indices))

The model weights were saved on the very first run of this model and ara available here: 
Object URL
https://s3.amazonaws.com/soundflux-urbansounds/augmented_three_classes_vgg16_retrained_model.zip