In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras import backend as K
from keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
K.set_image_dim_ordering('th')
import pickle

Using TensorFlow backend.


In [47]:
"""words list represents the "pkl" file names
    which have the same word as .wav file.
    The pkl file is dictionary with key as the word spoken
    and the value as numpy array where element is a (20 * 100) vector
    representing the mfcc vectors of the .wav file. """

words = ["bed", "down", "dog", "eight", "four", "cat","go","bird","five"]
#words = ["bed", "down", "dog"]

In [49]:
data = pickle.load( open( words[0]+".pkl", "rb" ) )
for keys in data:
    X = data[keys]
    Y = np.array([[keys]] * data[keys].shape[0])
    #print X_1.shape
    
    
for i in range(1,len(words)):
    data = pickle.load( open( words[i]+".pkl", "rb" ) )
    for keys in data:
        Y = np.append(Y, [[keys]] * data[keys].shape[0], axis = 0)
        X = np.append(X, data[keys], axis = 0)
        #print X_1.shape
        
print X.shape
print Y.shape
print Y

(18735, 20, 100)
(18735, 1)
[['bed']
 ['bed']
 ['bed']
 ..., 
 ['five']
 ['five']
 ['five']]


In [50]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(Y)
#print integer_encoded
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
Y = onehot_encoded

[ 1.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  1.  0.  0.  0.  0.]
[ 0.  0.  0.  1.  0.  0.  0.  0.  0.]


In [51]:
"""Convolution Layer in Keras
   accepts input as 3d - height, width, depth(RGB channels).
   Since we do not have depth in text, we can convert it to a vector
   with a depth = 1"""

X = X.reshape(X.shape[0], 1, 20, 100)
print X.shape
print Y.shape
num_classes = Y.shape[1]
print num_classes

(18735, 1, 20, 100)
(18735, 9)
9


In [59]:
model = Sequential()
model.add(Conv2D(30, (5, 5), input_shape=(1, 20, 100), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(15, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [60]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 30, 16, 96)        780       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 30, 8, 48)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 15, 6, 46)         4065      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 15, 3, 23)         0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 15, 3, 23)         0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1035)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               132608    
__________

In [61]:
file_path="CNN_val_loss.hdf5"
checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early = EarlyStopping(monitor="val_loss", mode="min", patience=20)
callbacks_list = [checkpoint, early] #early
model.fit(X, Y, validation_split=0.1, callbacks=callbacks_list, epochs=1, batch_size= 32)

Train on 16861 samples, validate on 1874 samples
Epoch 1/1


<keras.callbacks.History at 0x7f72725e4990>

In [74]:
model = load_model(file_path)

In [75]:
model.fit(X, Y, validation_split=0.1, callbacks=callbacks_list, epochs=3, batch_size= 32)

Train on 16861 samples, validate on 1874 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f7271842e50>

In [76]:
model = load_model(file_path)

In [77]:
predicted = model.predict(X)

In [78]:
from sklearn import metrics

In [79]:
metrics.log_loss(Y, predicted)

0.26051461704943474