In [None]:
import os
from scipy.io import wavfile
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import keras
from keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from keras.layers import Dropout, Dense, TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from keras.callbacks import ModelCheckpoint
import librosa
from tqdm import tqdm
from sklearn.utils import shuffle

In [None]:
img_shape = (128, 32, 1) # rows, columns, channels
power = .5
sr = 16000
lr = .002
commands = ['go', 'no', 'stop','yes', 'up', 'down', 'left', 'right']

In [None]:
def prepare_data(datatype):
    y = []
    command_num = 0
    for command in commands:
        command_data = np.load('./data/' + command + '-' + datatype + '.npy', allow_pickle=True)
        for _ in range(command_data.shape[0]):
            y.append(command_num)
        if command_num == 0:
            X = command_data
        else:
            X = np.concatenate((X, command_data), axis=0)
        command_num += 1
        
    # add generated data to training dataset    
    if datatype == 'train':
        generated_data = np.load('./data/speech-commands-generated-data.npy', allow_pickle=True)
        X_generated, y_generated = zip(*generated_data)
        X = np.concatenate((X, X_generated), axis=0)
        y = y + list(y_generated)    
        
    # one hot encode target output
    y = np.array(keras.utils.to_categorical(y, len(commands)))
    
    # normalize
    X = np.expand_dims(X, axis=3)
    _max = np.amax(X)
    X = X / _max
    
    return shuffle(X, y, random_state=0)

In [None]:
X_train, y_train = prepare_data('train')

In [None]:
X_test, y_test = prepare_data('test')

In [None]:
model = Sequential()
model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=(img_shape)))
model.add(Conv2D(128, (3,3), activation='relu', strides=(1,1), padding='same'))
model.add(Conv2D(128, (3,3), activation='relu', strides=(1,1), padding='same'))
model.add(MaxPool2D(2,2))
model.add(Flatten())

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))

model.add(Dense(len(commands), activation='softmax'))

adam = keras.optimizers.Adam(lr=lr)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['categorical_accuracy'])

In [None]:
history = model.fit(X_train, y_train, verbose=2, epochs=50, batch_size=128, validation_split=0.2)

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
predictions = model.predict(X_test)

In [None]:
predictionList = []
actualList = []
for pred in predictions:
    predictionList.append(np.argmax(pred))
for actual in y_test:
    actualList.append(np.argmax(actual))

In [None]:
correct = 0
total = 0
for i in range(len(predictionList)):
    if predictionList[i] == actualList[i]:
        correct += 1
    total += 1

In [None]:
correct/total 

# Train on 14610 samples, validate on 3653 samples :0.7974394520101117

In [None]:
# yes, no, stop, go
#Train on 4903 samples, validate on 1226 samples: 0.9663892967857726
#Train on 6492 samples, validate on 1624 samples: 0.9738945994452602
#Train on 9703 samples, validate on 2426 samples: 0.968347201827378