# Training a NN with Urban Sound Challenge Data

**Background:**

In parallel to the actuall fall detection experiment, we'll train a neural network leveraging the urban challenge dataset in order to develop the necessary libraries to map wav files to features (either MFCC or spectogram) and baseline the performance of the different approaches)

## Libraries and File Locations

In [1]:
import matplotlib.pyplot as plt
from python_speech_features import mfcc, ssc, logfbank
import pandas as pd
import os
import numpy as np
import soundfile
from scipy import signal

In [2]:
URBAN_SOUND_DIR = "/media/nvidia/ROMULO'S/urban_sound_challenge/"

## Load sample WAV File

Test that features can be generated in a couple of ways

* MFCC
* Spectrograms

In [3]:
def log_specgram(audio, sample_rate, window_size=10, 
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    _, _, spec = signal.spectrogram(audio, fs=sample_rate,
                                    window='hann', nperseg=nperseg, noverlap=noverlap,
                                    detrend=False)
    return np.log(spec.T.astype(np.float32) + eps)


**MFCCs**

In [4]:
samples, sample_rate = soundfile.read(URBAN_SOUND_DIR+"Train/5132.wav")

In [5]:
mfcc_feat = mfcc(samples,sample_rate,nfft=1200)

In [53]:
mfcc_feat.shape

(68, 13)

**Log F Bank**

In [54]:
logfbank_feat = logfbank(samples,sample_rate,nfft=1200)

In [55]:
logfbank_feat.shape

(68, 26)

**Spectrogram**

In [56]:
samples.shape

(16573, 2)

In [58]:
f, t, spectrogram= signal.spectrogram(samples,fs=sample_rate)

In [67]:
spectrogram.shape

(16573, 2, 1)

In [68]:
log_spectrogram = log_specgram(samples,sample_rate,10, 0)
log_spectrogram.shape

(1, 2, 16573)

## Training using MFCC Loading the dataset

In [6]:
train_df = pd.read_csv(URBAN_SOUND_DIR+'train.csv')
test_df = pd.read_csv(URBAN_SOUND_DIR+'test.csv')

In [7]:
def parse_mfcc(row,folder,nfft):
    # fun
    #ction to load files and extract features
    file_name = os.path.join(URBAN_SOUND_DIR, folder, str(row.ID) + '.wav')
   # handle exception to check if there isn't a file which is corrupted
    try:
        samples, sample_rate = soundfile.read(file_name)
        # here kaiser_fast is a technique used for faster extraction
        mfcc_feat = mfcc(samples,sample_rate,nfft=nfft)
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return pd.Series([None, None])
    if folder == 'Test':
        return pd.Series([mfcc_feat])
    return pd.Series([mfcc_feat, row.Class])

In [52]:
temp = train_df.apply(parse_mfcc,args=('Train',2400), axis=1)
temp.columns = ['feature', 'label']

In [53]:
temp.dropna(inplace=True)

In [63]:
assert temp.shape == (5435,2)

NameError: name 'temp' is not defined

## Define the classes and encode the training set

In [8]:
classes = list(temp['label'].unique())

NameError: name 'temp' is not defined

In [10]:
temp['label_encoded'] = temp['label'].apply(classes.index)

NameError: name 'classes' is not defined

### Save the dataframe for future models/training sessions

In [57]:
temp.to_json(URBAN_SOUND_DIR+"train_mfcc_transformed.json")

In [9]:
temp = pd.read_json(URBAN_SOUND_DIR+"train_mfcc_transformed.json")

# NEXT TEST THE JSON LOADING FORMAT!!!!

In [46]:
#temp = pd.read_csv(URBAN_SOUND_DIR+"train_mfcc_transformed.csv", dtype={'feature': np.ndarray.dtype} )

In [None]:
temp.columns

## Aggregate MFCC series into a single dimension

In [65]:
X_avg = []
for x in temp.feature.tolist():
    X_avg.append(np.mean(x,axis=0))

In [66]:
X_avg[0].shape

(13,)

In [67]:
X = np.array(X_avg)

In [14]:
y = np.array(temp.label_encoded.tolist())

In [68]:
set([x.shape for x in X]) == set([(13,)])

True

## Aggregate MFCC to padded array

In [None]:
x_padded = []
for x in temp.feature.tolist():
    pad = np.zeros((801,13))
    pad[:x.shape[0],:] = x
    x_padded.append(pad)
x_padded_array = np.array(x_padded)

## Raw MFCC, different shapes

In [46]:
x_raw = np.array([np.array([np.array(y) for y in x]) for x in temp.feature.tolist()])

In [48]:
x_raw[2].shape

(799, 13)

In [49]:
y.shape

(5435,)

# Setting up a simple model

With convolutions on the temporal space

In [12]:
import tensorflow as tf
from tensorflow import keras

In [50]:
conv_layer = keras.layers.Conv1D(200, (10),
                                 input_shape = (None,13),
                                 strides=1, padding='valid', 
                                 data_format='channels_last', 
                                 dilation_rate=1, 
                                 activation=None, 
                                 use_bias=True, 
                                 kernel_initializer='glorot_uniform', 
                                 bias_initializer='zeros', 
                                 kernel_regularizer=None, 
                                 bias_regularizer=None, 
                                 activity_regularizer=None, 
                                 kernel_constraint=None, bias_constraint=None)

In [51]:
model = keras.Sequential([
    conv_layer,
    #keras.layers.Flatten(input_shape=(13,)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(70, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [52]:
model.compile(optimizer='sgd', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [53]:
model.fit(temp.feature, y, epochs=10,batch_size=10,shuffle=False,validation_split=0.1)

ValueError: Error when checking input: expected conv1d_3_input to have 3 dimensions, but got array with shape (5435, 1)

In [27]:
## Save the model to JSON

In [77]:
# serialize model to JSON
model_json = model.to_json()
with open("./trained_models/hdf5/urban_sound_model.json", "w") as json_file:
    json_file.write(model_json)

In [78]:
# serialize weights to HDF5
model.save_weights("./trained_models/hdf5/urban_sound_model.h5")
print("Saved model to disk")

Saved model to disk


# Test the MFCC model

In [84]:
test_temp = test_df.apply(parse_mfcc,args=('Test',4800), axis=1)
test_temp.columns = ['feature']

In [85]:
test_temp.to_json(URBAN_SOUND_DIR+"test_mfcc_transformed.json")

In [101]:
test_temp.shape

(3297, 1)

# First Submission/ Prediction

In [107]:
X_test_avg = []
for x in test_temp.feature.tolist():
    X_test_avg.append(np.mean(x,axis=0))
X_test = np.array(X_test_avg)

In [108]:
len(X_test)

3297

In [109]:
y_prediction = model.predict(X_test)

In [115]:
test_df['prediction'] = y_prediction.argmax(axis=-1)

In [121]:
def class_for_idx(cl):
    return classes[cl]

In [122]:
class_for_idx(1)

'street_music'

In [127]:
test_df['Class'] = test_df['prediction'].apply(class_for_idx)

In [128]:
test_df.to_csv(URBAN_SOUND_DIR+'test_prediction_baseline.csv')

# Simple model using spectrograms

In [2]:
def parser_spec(row):
    # fun
    #ction to load files and extract features
    folder = 'Train'
    file_name = os.path.join(URBAN_SOUND_DIR, folder, str(row.ID) + '.wav')
   # handle exception to check if there isn't a file which is corrupted
    try:
        samples, sample_rate = soundfile.read(file_name)
        # here kaiser_fast is a technique used for faster extraction
        #f, t, spectrogram= signal.spectrogram(samples,sample_rate)
        spectrogram = log_specgram(samples,sample_rate)
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return pd.Series([None, None])
    return pd.Series([spectrogram, row.Class])

In [9]:
train_df.head()

Unnamed: 0,ID,Class
0,0,siren
1,1,street_music
2,2,drilling
3,3,siren
4,4,dog_bark


## Extracting features in chunks

In [3]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))


In [4]:
chunk = 0
chunk_size = 50
for i in chunker(train_df.head(),chunk_size):
    temp_spec = i.apply(parser_spec, axis=1)
    temp_spec.columns = ['feature', 'label']
    temp_spec.to_json(URBAN_SOUND_DIR+"train_spectrogram_transformed_chunk_{}.json".format(chunk))
    print("Extracted features for chunk # {} of size {}".format(chunk,chunk_size))
    chunk +=1

NameError: name 'train_df' is not defined