# MNIST

In [2]:
from __future__ import print_function

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import SimpleRNN
from keras.initializations import normal, identity
from keras.optimizers import RMSprop
from keras.utils import np_utils

In [18]:
batch_size = 32
nb_classes = 10
nb_epochs = 5
hidden_units = 100

learning_rate = 1e-6
clip_norm = 1.0

In [4]:
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], -1, 1)
X_test = X_test.reshape(X_test.shape[0], -1, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_train shape: (60000, 784, 1)
60000 train samples
10000 test samples


In [5]:
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [29]:
X_train[0].shape

(784, 1)

In [30]:
Y_train[0].shape

(10,)

In [19]:
print('Evaluate IRNN...')
model = Sequential()
model.add(SimpleRNN(output_dim=hidden_units,
                   init=lambda shape, name: normal(shape, scale=0.001, name=name),
                   inner_init=lambda shape, name: identity(shape, scale=1.0, name=name),
                   activation='relu',
                   input_shape=X_train.shape[1:]))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
rmsprop = RMSprop(lr=learning_rate)
model.compile(loss='categorical_crossentropy',
             optimizer=rmsprop,
             metrics=['accuracy'])

model.summary()

Evaluate IRNN...
____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
simplernn_2 (SimpleRNN)            (None, 100)         10200       simplernn_input_2[0][0]          
____________________________________________________________________________________________________
dense_2 (Dense)                    (None, 10)          1010        simplernn_2[0][0]                
____________________________________________________________________________________________________
activation_2 (Activation)          (None, 10)          0           dense_2[0][0]                    
Total params: 11210
____________________________________________________________________________________________________


In [26]:
import numpy as np
for e in xrange(nb_epochs):
    e += 1
    print("... epoch %d" % e)
    for i in xrange(100):
        model.train_on_batch(np.asarray([X_train[i]]),np.asarray([Y_train[i]]))

... epoch 1
... epoch 2
... epoch 3
... epoch 4
... epoch 5


In [110]:
np.asarray([X_train[0]]).shape

(1, 140)

In [109]:
np.asarray([Y_train[0]]).shape

(1, 10)

In [15]:
# model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs,
#          verbose=1, validation_data=(X_test, Y_test))

# scores = model.evaluate(X_test, Y_test, verbose=0)
# print('IRNN test score:', scores[0])
# print('IRNN test accuracy:', scores[1])

# IMDB

In [31]:
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

In [83]:
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Embedding
from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed
from keras.datasets import imdb

In [71]:
max_features = 20000
# maxlen = 80  # cut texts after this number of words (among top max_features most common words)
# batch_size = 32
nb_epochs = 1

In [72]:
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
                                                      test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

# print('Pad sequences (samples x time)')
# X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
# X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
# print('X_train shape:', X_train.shape)
# print('X_test shape:', X_test.shape)

Loading data...
20000 train sequences
5000 test sequences


In [73]:
X_train_array = map(np.asarray, X_train)

In [74]:
for i in xrange(5):
    print(X_train_array[i].shape)

(140,)
(268,)
(193,)
(170,)
(460,)


In [99]:
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, dropout=0.2))
model.add(SimpleRNN(128, dropout_W=0.2, dropout_U=0.2))  # try using a GRU instead, for fun
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
embedding_8 (Embedding)            (None, None, 128)   2560000     embedding_input_8[0][0]          
____________________________________________________________________________________________________
simplernn_6 (SimpleRNN)            (None, 128)         32896       embedding_8[0][0]                
____________________________________________________________________________________________________
dense_10 (Dense)                   (None, 1)           129         simplernn_6[0][0]                
____________________________________________________________________________________________________
activation_9 (Activation)          (None, 1)           0           dense_10[0][0]                   
Total params: 2593025
______________________________________________________

In [101]:
model.get_weights()

[array([[-0.02355493,  0.03775547,  0.04400666, ...,  0.02206854,
          0.04706001, -0.04419404],
        [ 0.01896344,  0.0287048 , -0.01062597, ..., -0.01697459,
          0.00551123, -0.04316794],
        [-0.0296558 ,  0.00680088, -0.00907463, ...,  0.02209495,
         -0.00933748, -0.01761595],
        ..., 
        [-0.00046353, -0.02349745,  0.01643985, ...,  0.00392914,
         -0.04027422, -0.01190181],
        [-0.02055955, -0.02201191, -0.01494761, ...,  0.03884822,
         -0.03894757, -0.04079661],
        [ 0.00656505,  0.02693819,  0.00667183, ...,  0.04668265,
         -0.03533   ,  0.03377445]], dtype=float32),
 array([[ 0.1055688 , -0.12342519, -0.10878105, ..., -0.00858231,
          0.12598683, -0.08081356],
        [ 0.07604036,  0.10669979,  0.03297701, ..., -0.04051876,
          0.1440569 ,  0.0638234 ],
        [ 0.07991011,  0.14819762,  0.14840211, ...,  0.01174653,
          0.14163351,  0.03934941],
        ..., 
        [-0.10020792, -0.06925484,  0

In [103]:
import numpy as np
for e in xrange(nb_epochs):
    e += 1
    print("... epoch %d" % e)
    for i in xrange(10):
        print("... processing sentence %d" % (i+1))
        model.train_on_batch(np.asarray([X_train_array[i]]),np.asarray([y_train[i]]))

... epoch 1
... processing sentence 1
... processing sentence 2
... processing sentence 3
... processing sentence 4
... processing sentence 5
... processing sentence 6
... processing sentence 7
... processing sentence 8
... processing sentence 9
... processing sentence 10


In [105]:
np.asarray([X_train_array[0]]).shape

(1, 140)

In [106]:
np.asarray([y_train[0]]).shape

(1,)

In [104]:
model.get_weights()

[array([[-0.02355493,  0.03775547,  0.04400666, ...,  0.02206854,
          0.04706001, -0.04419404],
        [ 0.01370944,  0.03077123, -0.00727041, ..., -0.01355962,
          0.00217738, -0.04649   ],
        [-0.02992092,  0.00511273, -0.01397125, ...,  0.01638973,
         -0.00536492, -0.01938026],
        ..., 
        [-0.00046353, -0.02349745,  0.01643985, ...,  0.00392914,
         -0.04027422, -0.01190181],
        [-0.02055955, -0.02201191, -0.01494761, ...,  0.03884822,
         -0.03894757, -0.04079661],
        [ 0.00656505,  0.02693819,  0.00667183, ...,  0.04668265,
         -0.03533   ,  0.03377445]], dtype=float32),
 array([[ 0.10890196, -0.11948258, -0.1114145 , ..., -0.00828316,
          0.12316964, -0.07707916],
        [ 0.07849296,  0.11122414,  0.02938139, ..., -0.04434482,
          0.141849  ,  0.06142586],
        [ 0.07868151,  0.14369197,  0.15295935, ...,  0.01617073,
          0.14210127,  0.04175207],
        ..., 
        [-0.09932148, -0.06436142,  0

In [None]:
# print('Train...')
# print(X_train.shape)
# print(y_train.shape)
# model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15,
#           validation_data=(X_test, y_test))
# score, acc = model.evaluate(X_test, y_test,
#                             batch_size=batch_size)
# print('Test score:', score)
# print('Test accuracy:', acc)