In [1]:
import numpy as np

In [2]:
X = []
X.append([1,0,0,0])
X.append([0,1,0,0])
X.append([0,0,1,0])
X.append([0,0,0,1])
X.append([0,0,0,1])
X.append([1,0,0,0])
X.append([0,1,0,0])
X.append([0,0,1,0])
X.append([0,0,0,1])

y = [0.2,0.3,0.4,0.5,0.05,0.1,0.2,0.3,0.4]

In [3]:
def sigmoid(x):
    return 1/(1+np.exp(-x))
def sigmoid_der(x):
    return 1.0 - x**2

In [4]:
layers = []
# 4 input variables, 16 hidden units and 1 output variable
n_units = (4,16,1)
n_layers = len(n_units)

layers.append(np.ones(n_units[0]+1+n_units[1]))
for i in range(1,n_layers):
    layers.append(np.ones(n_units[i]))
weights = []
for i in range(n_layers-1):
    weights.append(np.zeros((layers[i].size,layers[i+1].size)))
weights_delta = [0,]*len(weights)


In [5]:
def forward(data):
    layers[0][:n_units[0]] = data
    layers[0][n_units[0]:-1] = layers[1]
    
    # propagate the data forwards
    for i in range(1,n_layers):
        layers[i][...] = sigmoid(np.dot(layers[i-1],weights[i-1]))
    return layers[-1]

In [8]:
def backward(target,update=False,learning_rate=0.1,momentum=0.1):
    deltas = []
    error = target - layers[-1]
    if update is False : return error
    delta = error * sigmoid_der(layers[-1])
    deltas.append(delta)
    
    # determine error in hidden layers
    for i in range(n_layers-2,0,-1):
        delta = np.dot(deltas[0],weights[i].T) * sigmoid_der(layers[i])
        deltas.insert(0,delta)
    #update weights
    for i in range(len(weights)):
        layer = np.atleast_2d(layers[i])
        delta = np.atleast_2d(deltas[i])
        weights_delta_temp = np.dot(layer.T,delta)
        weights[i] += learning_rate * weights_delta_temp + momentum * weights_delta[i]
        weights_delta[i] = weights_delta_temp
    
    return (error**2).sum()

In [7]:
n_epochs = 10000
for i in range(n_epochs):
    loss = 0
    for j in range(len(X)):
        forward(X[j])
        backward(y[j])
        loss += (y[j] - forward(X[j]))**2
    if i%1000 == 0:
        print("epoch {} - loss:{:04.4f}".format(i,loss[0]))

epoch 0 - loss:0.3116
epoch 1000 - loss:0.1660
epoch 2000 - loss:0.1801
epoch 3000 - loss:0.1877
epoch 4000 - loss:0.1914
epoch 5000 - loss:0.1922
epoch 6000 - loss:0.1920
epoch 7000 - loss:0.1916
epoch 8000 - loss:0.1913
epoch 9000 - loss:0.1912


In [10]:
for i in range(len(X)):
    pred = forward(X[i])
    loss = backward(y[j],update=False)
    print("X: {}; y: {:04.2f}; pred: {:04.2f}".format(X[i],y[i],pred[0]))


X: [1, 0, 0, 0]; y: 0.20; pred: 0.14
X: [0, 1, 0, 0]; y: 0.30; pred: 0.40
X: [0, 0, 1, 0]; y: 0.40; pred: 0.29
X: [0, 0, 0, 1]; y: 0.50; pred: 0.34
X: [0, 0, 0, 1]; y: 0.05; pred: 0.30
X: [1, 0, 0, 0]; y: 0.10; pred: 0.16
X: [0, 1, 0, 0]; y: 0.20; pred: 0.38
X: [0, 0, 1, 0]; y: 0.30; pred: 0.30
X: [0, 0, 0, 1]; y: 0.40; pred: 0.33


# LSTM with Keras

In [12]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation
from keras.layers import Embedding,LSTM

from keras.datasets import imdb


In [13]:
n_words = 1000
(X_train, Y_train) , (X_test, Y_test) = imdb.load_data(num_words=n_words)
print("Train seq : {} \nTest seq:{}".format(len(X_train),len(X_test)))

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
Train seq : 25000 
Test seq:25000


In [14]:
print("Train example : {} \nTest example:{}".format(X_train[0],X_test[0]))
# data is already preprocessed (words are mapped to vectors)

Train example : [1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32] 
Test example:[1, 591, 202, 14, 31, 6, 717, 10, 10, 2, 2, 5, 4, 360, 7, 4, 177, 2, 394, 354, 4, 123, 9, 2, 2, 2, 

In [15]:
# Pad sequences with max_len
max_len = 200
X_train = sequence.pad_sequences(X_train,maxlen=max_len)
X_test = sequence.pad_sequences(X_test,maxlen=max_len)


In [16]:
# Define network architecture
model = Sequential()
model.add(Embedding(n_words,50,input_length=max_len))
model.add(Dropout(0.2))
model.add(LSTM(100,dropout=0.2,recurrent_dropout=0.2))
model.add(Dense(250,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1,activation="sigmoid"))

model.compile(loss="binary_crossentropy",optimizer="adagrad",metrics=["accuracy"])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 200, 50)           50000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 200, 50)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               60400     
_________________________________________________________________
dense_1 (Dense)              (None, 250)               25250     
_________________________________________________________________
dropout_2 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 251       
Total params: 135,901
Trainable params: 135,901
Non-trainable params: 0
_________________________________________________________________


In [17]:
batch_size = 64
n_epochs = 10

model.fit(X_train, Y_train,batch_size=batch_size,epochs=n_epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc1acec3828>

In [18]:
print("Accuracy on test set : {}".format(model.evaluate(X_test,Y_test)[1]))

Accuracy on test set : 0.84036


In [29]:
from keras.layers import GRU
from keras.callbacks import EarlyStopping

In [24]:
model2 = Sequential()
model2.add(Embedding(n_words,50,input_length=max_len))
model2.add(GRU(100,dropout=0.2,recurrent_dropout=0.2))
model2.add(Dropout(0.2))
model2.add(Dense(250,activation="relu"))
model2.add(Dropout(0.2))
model2.add(Dense(1,activation="sigmoid"))

model2.compile(loss="binary_crossentropy",optimizer="adagrad",metrics=["accuracy"])
model2.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 200, 50)           50000     
_________________________________________________________________
gru_2 (GRU)                  (None, 100)               45300     
_________________________________________________________________
dropout_5 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 250)               25250     
_________________________________________________________________
dropout_6 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 251       
Total params: 120,801
Trainable params: 120,801
Non-trainable params: 0
_________________________________________________________________


In [30]:
callbacks = [EarlyStopping(monitor="val_acc",patience=3)]

In [31]:
batch_size = 512
n_epochs = 100

model2.fit(X_train, Y_train,batch_size=batch_size,epochs=n_epochs,validation_split=0.2,callbacks=callbacks)

Train on 20000 samples, validate on 5000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


<keras.callbacks.History at 0x7fc19cda5080>

In [33]:
print("Accuracy on test set : {}".format(model2.evaluate(X_test,Y_test)[1]))

Accuracy on test set : 0.86796


# Bi-directional RNNs

In [32]:
from keras.layers import Bidirectional

In [34]:
bi_model = Sequential()
bi_model.add(Embedding(n_words,50,input_length=max_len))
bi_model.add(Bidirectional(LSTM(100,dropout=0.2,recurrent_dropout=0.2)))
bi_model.add(Dense(250,activation="relu"))
bi_model.add(Dropout(0.2))
bi_model.add(Dense(1,activation="sigmoid"))

bi_model.summary()
bi_model.compile(loss="binary_crossentropy",optimizer="adagrad",metrics=["accuracy"])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 200, 50)           50000     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 200)               120800    
_________________________________________________________________
dense_7 (Dense)              (None, 250)               50250     
_________________________________________________________________
dropout_7 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 251       
Total params: 221,301
Trainable params: 221,301
Non-trainable params: 0
_________________________________________________________________


In [35]:
bi_model.fit(X_train,Y_train,batch_size=batch_size,epochs=n_epochs,validation_split=0.2,callbacks=callbacks)

Train on 20000 samples, validate on 5000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<keras.callbacks.History at 0x7fc19e45cef0>

In [36]:
print("Accuracy on test set : {}".format(bi_model.evaluate(X_test,Y_test)[1]))

Accuracy on test set : 0.84464


# RNN Text Generation