#### Convert the data and labels

### Classifying Urban sounds using Deep Learning

#### Visualizing Audio

#### Load Dataset

In [4]:
import librosa
import numpy as np

def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccsscaled

In [5]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 
fulldatasetpath = './model/barkend/audio/'
metadata = pd.read_csv('./model/barkend/metadata/barkend.csv')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    file_name = os.path.join(
        os.path.abspath(fulldatasetpath),
        str(row["fold"]) + '/',
        str(row["slice_file_name"])
    )
    
    class_label = row["class"]
    data = extract_features(file_name)
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class'])

print('Finished feature extraction from ', len(featuresdf), ' files')

Finished feature extraction from  326  files


In [6]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import numpy as np

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
class_list = featuresdf['class']
y = np.array(class_list.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


#### Split the dataset

In [7]:
# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

### Training and Evaluation

In [8]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()

model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

#### Compile the model

In [9]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [10]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 256)               10496     
_________________________________________________________________
activation_1 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
activation_2 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 7)                

#### Training

In [11]:
from keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='model/weights.hdf5',
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)

duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 260 samples, validate on 66 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 18.29457, saving model to model/weights.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 18.29457 to 5.62894, saving model to model/weights.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 5.62894 to 3.53263, saving model to model/weights.hdf5
Epoch 4/100

Epoch 00004: val_loss improved from 3.53263 to 2.72551, saving model to model/weights.hdf5
Epoch 5/100

Epoch 00005: val_loss improved from 2.72551 to 1.67575, saving model to model/weights.hdf5
Epoch 6/100

Epoch 00006: val_loss improved from 1.67575 to 0.95528, saving model to model/weights.hdf5
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.95528
Epoch 8/100

Epoch 00008: val_loss improved from 0.95528 to 0.82516, saving model to model/weights.hdf5
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.82516
Epoch 10/100

Epoch 00010: val_loss improved from 0.82516 to 0.63841, saving model to model/weights.h


Epoch 00039: val_loss did not improve from 0.23100
Epoch 40/100

Epoch 00040: val_loss did not improve from 0.23100
Epoch 41/100

Epoch 00041: val_loss did not improve from 0.23100
Epoch 42/100

Epoch 00042: val_loss improved from 0.23100 to 0.22898, saving model to model/weights.hdf5
Epoch 43/100

Epoch 00043: val_loss did not improve from 0.22898
Epoch 44/100

Epoch 00044: val_loss did not improve from 0.22898
Epoch 45/100

Epoch 00045: val_loss did not improve from 0.22898
Epoch 46/100

Epoch 00046: val_loss did not improve from 0.22898
Epoch 47/100

Epoch 00047: val_loss did not improve from 0.22898
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.22898
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.22898
Epoch 50/100

Epoch 00050: val_loss improved from 0.22898 to 0.22523, saving model to model/weights.hdf5
Epoch 51/100

Epoch 00051: val_loss improved from 0.22523 to 0.21332, saving model to model/weights.hdf5
Epoch 52/100

Epoch 00052: val_loss did not impr

Epoch 79/100

Epoch 00079: val_loss did not improve from 0.12512
Epoch 80/100

Epoch 00080: val_loss did not improve from 0.12512
Epoch 81/100

Epoch 00081: val_loss did not improve from 0.12512
Epoch 82/100

Epoch 00082: val_loss did not improve from 0.12512
Epoch 83/100

Epoch 00083: val_loss did not improve from 0.12512
Epoch 84/100

Epoch 00084: val_loss did not improve from 0.12512
Epoch 85/100

Epoch 00085: val_loss did not improve from 0.12512
Epoch 86/100

Epoch 00086: val_loss did not improve from 0.12512
Epoch 87/100

Epoch 00087: val_loss improved from 0.12512 to 0.11550, saving model to model/weights.hdf5
Epoch 88/100

Epoch 00088: val_loss did not improve from 0.11550
Epoch 89/100

Epoch 00089: val_loss did not improve from 0.11550
Epoch 90/100

Epoch 00090: val_loss did not improve from 0.11550
Epoch 91/100

Epoch 00091: val_loss did not improve from 0.11550
Epoch 92/100

Epoch 00092: val_loss did not improve from 0.11550
Epoch 93/100

Epoch 00093: val_loss did not improv

#### Test the model

In [12]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9653846025466919
Testing Accuracy:  0.9696969985961914


#### Predictions

In [13]:
import librosa
import numpy as np

def extract_feature(file_name):
   
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None, None

    return np.array([mfccsscaled])

def print_prediction(file_name):
    prediction_feature = extract_feature(file_name)

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector)
    print("The predicted class is:", predicted_class[0], '\n')

    predicted_proba_vector = model.predict_proba(prediction_feature)
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)):
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f'))

#### Validation

In [17]:
# Class: Dog Bark

filename = './validation/chainsaw.wav' 
print_prediction(filename)

The predicted class is: chainsaw 

birds_chirping 		 :  0.00000000000018340632289802483168
chainsaw 		 :  0.99999976158142089843750000000000
dog_barking 		 :  0.00000007591842177134822122752666
engine 		 :  0.00000010636663461127682239748538
many_dogs_barking 		 :  0.00000000147808032480156725796405
people_talking 		 :  0.00000000000000992570232954828949
siren 		 :  0.00000000084233253705434663061169


In [15]:
import os

import tensorflow as tf
from tensorflow import keras

model.save('../barkend-ml-serving/model/barkend.hdf5')