## SRNN on Speech Commands Dataset


Please use `fetch_google.sh` to download the Google Speech Commands Dataset and python `process_google.py` to create feature extracted data.

In [1]:
from __future__ import print_function
import sys
import os
import numpy as np
import torch
import h5py

In [2]:
from edgeml_pytorch.graph.rnn import SRNN2
from edgeml_pytorch.trainer.srnnTrainer import SRNNTrainer
import edgeml_pytorch.utils as utils

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DATA_DIR = './GoogleSpeech/Extracted/'

In [4]:
# Copyright (c) Microsoft Corporation. All rights reserved.
f = h5py.File(DATA_DIR + 'train.h5','r')
x_train_ = np.array(f.get('X'))
y_train = np.array(f.get('Y'))
f = h5py.File(DATA_DIR + 'val.h5','r')
x_val_ = np.array(f.get('X'))
y_val = np.array(f.get('Y'))
f = h5py.File(DATA_DIR + 'test.h5','r')
x_test_ = np.array(f.get('X'))
y_test = np.array(f.get('Y'))
# Mean-var normalize
mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)
std = np.std(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)
std[std[:] < 0.000001] = 1
x_train_ = (x_train_ - mean) / std
x_val_ = (x_val_ - mean) / std
x_test_ = (x_test_ - mean) / std

x_train = np.swapaxes(x_train_, 0, 1)
x_val = np.swapaxes(x_val_, 0, 1)
x_test = np.swapaxes(x_test_, 0, 1)
print("Train shape", x_train.shape, y_train.shape)
print("Val shape", x_val.shape, y_val.shape)
print("Test shape", x_test.shape, y_test.shape)

Train shape (99, 51088, 32) (51088, 13)
Val shape (99, 6798, 32) (6798, 13)
Test shape (99, 6835, 32) (6835, 13)


In [5]:
numTimeSteps = x_train.shape[0]
numInput = x_train.shape[-1]
numClasses = y_train.shape[1]

# Network Parameters
brickSize = 11
hiddenDim0 = 64
hiddenDim1 = 32
cellType = 'LSTM'
learningRate = 0.01
batchSize = 128
epochs = 10

In [6]:
srnn2 = SRNN2(numInput, numClasses, hiddenDim0, hiddenDim1, cellType).to(device) 
trainer = SRNNTrainer(srnn2, learningRate, lossType='xentropy', device=device)

Using x-entropy loss


In [7]:
trainer.train(brickSize, batchSize, epochs, x_train, x_val, y_train, y_val, printStep=200, valStep=5)

Epoch 0 batch 0 loss 2.049031 acc 0.632812
Epoch 0 batch 200 loss 0.739568 acc 0.695312
Epoch 1 batch 0 loss 0.536956 acc 0.843750
Epoch 1 batch 200 loss 0.402417 acc 0.882812
Epoch 2 batch 0 loss 0.299402 acc 0.921875
Epoch 2 batch 200 loss 0.316270 acc 0.882812
Epoch 3 batch 0 loss 0.237716 acc 0.929688
Epoch 3 batch 200 loss 0.215562 acc 0.929688
Epoch 4 batch 0 loss 0.235044 acc 0.929688
Epoch 4 batch 200 loss 0.177791 acc 0.945312
Validation accuracy: 0.913504
Epoch 5 batch 0 loss 0.181037 acc 0.945312
Epoch 5 batch 200 loss 0.167289 acc 0.937500
Epoch 6 batch 0 loss 0.201628 acc 0.921875
Epoch 6 batch 200 loss 0.266160 acc 0.914062
Epoch 7 batch 0 loss 0.199887 acc 0.937500
Epoch 7 batch 200 loss 0.154214 acc 0.929688
Epoch 8 batch 0 loss 0.193560 acc 0.945312
Epoch 8 batch 200 loss 0.194838 acc 0.937500
Epoch 9 batch 0 loss 0.205967 acc 0.921875
Epoch 9 batch 200 loss 0.186773 acc 0.937500
Validation accuracy: 0.913063
