In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Input, Conv2D, BatchNormalization
from keras.layers import MaxPool2D, MaxPooling2D, Reshape, Dropout, SeparableConv2D
from keras.models import Model
from keras.utils import to_categorical
import os

# matplotlib for displaying the output
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline
from WavDataLoader import WavDataLoader
from WavDataGenerator import WavDataGenerator

Using TensorFlow backend.


In [8]:
nx = 40
ny = 32
labels = ['bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'four', 'go', 'happy', 'house', 'left', 'marvin', 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'silence', 'six', 'stop', 'three', 'tree', 'two', 'up', 'wow', 'yes', 'zero']
num_labels = len(labels)

In [9]:
if os.name is 'nt':
    data_dir = r'C:\Development\kaggle\tensorflow-speech-recognition-challenge\data\train\audio'
else:
    data_dir = r'/home/shaur141/Development/kaggle/tensorflow-speech-recognition-challenge/data/train/audio'
    
# wav_data_generator = WavDataGenerator(data_dir, labels, nx=nx, ny=ny, is_train=True, batch_size=32)
wav_data_loader = WavDataLoader(data_dir, labels, nx=nx, ny=ny)


In [10]:
def build_model():
    inputs = Input(shape=(nx, ny,1))
#     x = Reshape((wav_data_loader.nx*wav_data_loader.ny,))(inputs)
#     x = BatchNormalization()(inputs)
    x = Conv2D(16,(3,3),strides=(1,1), activation='relu')(inputs)
#     x = BatchNormalization()(x)
    x = MaxPool2D(strides=(1,1))(x)
    x = Dropout(0.25)(x)
    x = Conv2D(32,(3,3),strides=(2,2), activation='relu')(x)
    x = MaxPool2D(strides=(1,1))(x)
    x = Dropout(0.25)(x)
    x = Conv2D(64,(3,3),strides=(2,2), activation='relu')(x)    
#     x = BatchNormalization()(x)
    x = MaxPool2D(strides=(1,1))(x)
    x = Dropout(0.25)(x)
    x = SeparableConv2D(128,(3,3),strides=(2,2), activation='relu')(x) 
#     x = BatchNormalization()(x)
    x = MaxPool2D(strides=(1,1))(x)       
    x = Reshape((-1,))(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
#     x = Dense(128, activation='relu')(x)
    predictions = Dense(num_labels, activation='softmax')(x)    
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='Nadam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model    

In [11]:
model = build_model()

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 40, 32, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 38, 30, 16)        160       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 37, 29, 16)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 37, 29, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 14, 32)        4640      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 13, 32)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 17, 13, 32)        0         
__________

In [13]:
model.fit(x=wav_data_loader.X, y=to_categorical(wav_data_loader.y), validation_split=0.15, epochs=20)

# model.fit_generator(wav_data_generator.generator(), 
#                     steps_per_epoch=wav_data_generator.num_examples//wav_data_generator.batch_size,
#                    workers=4)
                    

Train on 1317 samples, validate on 233 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20

KeyboardInterrupt: 

In [21]:
model.save('model.h5')