In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.recurrent import Recurrent
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
import pickle
import matplotlib.pyplot as plt
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import SimpleRNN
plt.ion()
train, test, dicts = pickle.load(open('atis.pkl'))

idx2labels = dict((v,k) for k, v in dicts['labels2idx'].items())
idx2words = dict((v,k) for k, v in dicts['words2idx'].items())

def contextwin(l, win):
    assert (win % 2) == 1
    assert win >= 1
    l = list(l)
    lpadded = win // 2 * [-1] + l + win // 2 * [-1]
    out = [lpadded[i:(i + win)] for i in range(len(l))]
    assert len(out) == len(l)
    return out

contextwin(train[0][0], 5)

window_size = 7
nlabels =len(idx2labels)+1


Using Theano backend.


In [2]:
# from tqdm import tqdm
train_w = [contextwin(t, window_size) for t in train[0]]
test_w = [contextwin(t, window_size) for t in test[0]]
test_hot = [np_utils.to_categorical(t, nlabels) for t in test[2]]
train_hot = [np_utils.to_categorical(t, nlabels) for t in train[2]]

In [3]:
model = Sequential()
model.add(Embedding(len(idx2words)+1, 1024, input_length = window_size))
model.add(SimpleRNN(32))
model.add(Dense(nlabels*2))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(nlabels))
model.add(Activation('softmax'))
#model.add(Dropout(0.4))
%time h = model.compile(loss = 'categorical_crossentropy', optimizer = 'SGD')
model.summary()

Wall time: 0 ns
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_1 (Embedding)          (None, 7, 1024)       586752      embedding_input_1[0][0]          
____________________________________________________________________________________________________
simplernn_1 (SimpleRNN)          (None, 32)            33824       embedding_1[0][0]                
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 256)           8448        simplernn_1[0][0]                
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 256)           0           dense_1[0][0]                    
___________________________________________________________________________

In [None]:
from tqdm import tqdm
for e in range(2):
    print(e)
    for i, (tr,hot) in tqdm(enumerate(zip(train_w, train_hot)), total = len(train_w)):
        h = model.fit(np.asarray(tr), np.asarray(hot), batch_size = 1, nb_epoch = 1, verbose = 0)

In [42]:
len(train_w)

4978

In [43]:
model.predict_classes(test_w[0])-test[2][0]



array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 48,  3,  0,  0,  0,  0,  0,
        7,  7], dtype=int64)

In [44]:
errs = np.sum([len(np.nonzero(model.predict_classes(np.asarray(tw), verbose=0) - t)[0]) for tw, t in zip(train_w, train[2])])
words = np.sum(list(map(len, train[2])))
err_fraction = float(errs)/words
print('train accuracy = {} %'.format((1-err_fraction)*100))

train accuracy = 69.2648877894 %


In [45]:
errs = np.sum([len(np.nonzero(model.predict_classes(tw, verbose=0) - t)[0]) for tw, t in zip(test_w, test[2])])
words = np.sum(list(map(len, test[2])))
err_fraction = float(errs)/words
print('test accuracy = {} %'.format((1-err_fraction)*100))

test accuracy = 64.2965862144 %


In [80]:
wts = model.get_weights()

In [94]:
for ind, weight in enumerate(wts):
    print ind+1, '-->', weight.shape

1 --> (573L, 64L)
2 --> (64L, 32L)
3 --> (32L, 32L)
4 --> (32L,)
5 --> (32L, 128L)
6 --> (128L,)
