# Training a NN with Urban Sound Challenge Data

**Background:**

In parallel to the actuall fall detection experiment, we'll train a neural network leveraging the urban challenge dataset in order to develop the necessary libraries to map wav files to features (either MFCC or spectogram) and baseline the performance of the different approaches)

## Libraries and File Locations

In [49]:
import matplotlib.pyplot as plt
from python_speech_features import mfcc
import pandas as pd
import os
import numpy as np
import soundfile
from scipy import signal

In [2]:
URBAN_SOUND_DIR = "/media/nvidia/ROMULO'S/urban_sound_challenge/"

## Load sample WAV File

Test that features can be generated in a couple of ways

* MFCC
* Spectrograms

**MFCCs**

In [31]:
samples, sample_rate = soundfile.read(URBAN_SOUND_DIR+"Train/5132.wav")

In [32]:
mfcc_feat = mfcc(samples,sample_rate,nfft=2400)

In [37]:
mfcc_feat.dtype

dtype('float64')

**Spectrogram**

In [6]:
f, t, spectrogram= signal.spectrogram(samples,sample_rate)

  .format(nperseg, input_length))


In [8]:
spectrogram.shape

(16573, 2, 1)

## Training using MFCC Loading the dataset

In [50]:
train_df = pd.read_csv(URBAN_SOUND_DIR+'train.csv')
test_df = pd.read_csv(URBAN_SOUND_DIR+'test.csv')

In [83]:
def parse_mfcc(row,folder,nfft):
    # fun
    #ction to load files and extract features
    file_name = os.path.join(URBAN_SOUND_DIR, folder, str(row.ID) + '.wav')
   # handle exception to check if there isn't a file which is corrupted
    try:
        samples, sample_rate = soundfile.read(file_name)
        # here kaiser_fast is a technique used for faster extraction
        mfcc_feat = mfcc(samples,sample_rate,nfft=nfft)
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return pd.Series([None, None])
    if folder == 'Test':
        return pd.Series([mfcc_feat])
    return pd.Series([mfcc_feat, row.Class])

In [52]:
temp = train_df.apply(parse_mfcc,args=('Train',2400), axis=1)
temp.columns = ['feature', 'label']

In [53]:
temp.dropna(inplace=True)

In [54]:
assert temp.shape == (5435,2)

## Define the classes and encode the training set

In [55]:
classes = list(temp['label'].unique())

In [56]:
temp['label_encoded'] = temp['label'].apply(classes.index)

### Save the dataframe for future models/training sessions

In [57]:
temp.to_json(URBAN_SOUND_DIR+"train_mfcc_transformed.json")

In [59]:
temp = pd.read_json(URBAN_SOUND_DIR+"train_mfcc_transformed.json")

# NEXT TEST THE JSON LOADING FORMAT!!!!

In [46]:
#temp = pd.read_csv(URBAN_SOUND_DIR+"train_mfcc_transformed.csv", dtype={'feature': np.ndarray.dtype} )

In [64]:
temp.columns

Index(['feature', 'label', 'label_encoded'], dtype='object')

## Aggregate MFCC series into a single dimension

In [65]:
X_avg = []
for x in temp.feature.tolist():
    X_avg.append(np.mean(x,axis=0))

In [66]:
X_avg[0].shape

(13,)

In [67]:
X = np.array(X_avg)
y = np.array(temp.label_encoded.tolist())

In [68]:
set([x.shape for x in X]) == set([(13,)])

True

# Setting up a simple model

In [69]:
import tensorflow as tf
from tensorflow import keras

In [70]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(13,)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(70, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [71]:
model.compile(optimizer='sgd', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [76]:
model.fit(X, y, epochs=10,batch_size=10,shuffle=False,validation_split=0.1)

Train on 4891 samples, validate on 544 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f20187550>

In [27]:
## Save the model to JSON

In [77]:
# serialize model to JSON
model_json = model.to_json()
with open("./trained_models/hdf5/urban_sound_model.json", "w") as json_file:
    json_file.write(model_json)

In [78]:
# serialize weights to HDF5
model.save_weights("./trained_models/hdf5/urban_sound_model.h5")
print("Saved model to disk")

Saved model to disk


# Test the MFCC model

In [84]:
test_temp = test_df.apply(parse_mfcc,args=('Test',4800), axis=1)
test_temp.columns = ['feature']

In [85]:
test_temp.to_json(URBAN_SOUND_DIR+"test_mfcc_transformed.json")

In [None]:
#DELETE
temp.head().to_json("./tests/urban_sound_mfcc_sample/mfcc_labeled_samples.json")

# First Submission/ Prediction

In [None]:
X_test_avg = []
for x in temp_test.feature.tolist():
    X_test_avg.append(np.mean(x,axis=0))
X_test = np.array(X_avg)

In [87]:
y_prediction = model.predict(X_test)

NameError: name 'X_test' is not defined

In [None]:
y_prediction

# Simple model using spectrograms

In [1]:
def parser_spec(row):
    # fun
    #ction to load files and extract features
    folder = 'Train'
    file_name = os.path.join(URBAN_SOUND_DIR, folder, str(row.ID) + '.wav')
   # handle exception to check if there isn't a file which is corrupted
    try:
        samples, sample_rate = soundfile.read(file_name)
        # here kaiser_fast is a technique used for faster extraction
        f, t, spectrogram= signal.spectrogram(samples,sample_rate)
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return pd.Series([None, None])
    return pd.Series([spectrogram, row.Class])

In [2]:
temp_spec = train_df.apply(parser_spec, axis=1)
temp_spec.columns = ['feature', 'label']

NameError: name 'train_df' is not defined