In [113]:
import numpy as np
from utils import *
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Lambda,Bidirectional
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform

In [132]:


X_train, Y_train = read_csv('data/train.csv') 
X_test, Y_test = read_csv('data/test.csv') 

maxLen = 15
print(maxLen)

word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('data/glove.6B.50d.txt')

def sentences_to_indices(X, word_to_index, max_len):
    """
    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
    The output shape should be such that it can be given to `Embedding()` (described in Figure 4). 
    
    Arguments:
    X -- array of sentences (strings), of shape (m, 1)
    word_to_index -- a dictionary containing the each word mapped to its index
    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. 
    
    Returns:
    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
    """

    m = X.shape[0]
    X_indices = np.zeros((m, max_len))
    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0
        for w in sentence_words:
            if w not in word_to_index:
                X_indices[i, j] = 0 # HACK - FIX SOON
            else:
                if j >= maxLen:
                    print (sentence_words)
                X_indices[i, j] = word_to_index[w]
            j = j+1
    return X_indices

def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["lemon"].shape[0]
    emb_matrix = np.zeros((vocab_len, emb_dim)) # curious why not transpose of this...
    # Sets each row "index" of the embedding matrix to be 
    # the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]
    
    embedding_layer = Embedding(vocab_len, emb_dim, trainable = False)

    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix]) # now it's pretrained!

    return embedding_layer

embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)


15


In [133]:
# small 
X_dev, Y_dev = read_csv('data/dev.csv')
X_dev_indices = sentences_to_indices(X_dev, word_to_index, maxLen)

In [134]:
def Model_V1(input_shape, word_to_vec_map, word_to_index):
    """
    Function creating the Model-V1 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """
    
    sentence_indices = Input(shape = input_shape, dtype = np.int32)
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)

    # Propagates sentence_indices through the embedding layer
    embeddings = embedding_layer(sentence_indices)

    # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state
    LSTM1 = LSTM(128, return_sequences = True,name='LSTM1')(embeddings)
    # Adds dropout with probability 0.5
    X = Dropout(0.5)(LSTM1)
    # Another LSTM layer, but just returns one output
    LSTM2 = LSTM(128, return_sequences = True, name='LSTM2')(X)
    
    def get_last(X):
        return X[:,-1,:]
    
    LSTM2Last = Lambda(get_last, name='LSTM2-last')(LSTM2)
    Dropout2 = Dropout(0.5,name='Dropout2')(LSTM2Last)
    
    # Propagating through a Dense layer with sigmoid activation to get back a scalar
    Dense1 = Dense(1,name='Dense1')(Dropout2)
    X = Activation('sigmoid',name='output_layer')(Dense1)

    model = Model(inputs = sentence_indices, outputs = X)

    return model

# Adjustable model

In [135]:
def Model_V2(input_shape, word_to_vec_map, word_to_index,num_layer,num_cell,dropout_ratio,bidirectional):
    """
    Function creating the Model-V1 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """
    
    sentence_indices = Input(shape = input_shape, dtype = np.int32)
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)

    # Propagates sentence_indices through the embedding layer
    X = embedding_layer(sentence_indices)
    
    # add the first layer, if there is any.
    if num_layer == 2:
        print (2)
        # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state
        if bidirectional == True:
            print ('b1')
            LSTM1 = Bidirectional(LSTM(num_cell, return_sequences = True),name='LSTM1')(X)
        else:
            print('l1')
            LSTM1 = LSTM(num_cell, return_sequences = True,name='LSTM1')(X)
        # Adds dropout with probability 0.5
        X = Dropout(dropout_ratio)(LSTM1)

    # add second layer (or the only layer)
    if  num_layer == 1 and bidirectional == True:
        print ('b2')
        LSTM2 = Bidirectional(LSTM(num_cell, return_sequences = True), name='LSTM2')(X)
    else:
        print ('l2')
    # Another LSTM layer, but just returns one output
        LSTM2 = LSTM(num_cell, return_sequences = True, name='LSTM2')(X)
    
    def get_last(X):
        return X[:,-1,:]
    
    LSTM2Last = Lambda(get_last, name='LSTM2-last')(LSTM2)
    Dropout2 = Dropout(dropout_ratio,name='Dropout2')(LSTM2Last)
    
    # Propagating through a Dense layer with sigmoid activation to get back a scalar
    Dense1 = Dense(1,name='Dense1')(Dropout2)
    X = Activation('sigmoid',name='output_layer')(Dense1)

    model = Model(inputs = sentence_indices, outputs = X)

    return model

In [162]:
#model
num_layer=[1,2] # index 0 is better
num_cell=[32,64,96,128] # 0 index
drop_ratio=[0.1,0.2,0.3] # 2 index
bidirectional=[False,True] # 0 index

# optimizer
beta1=0.9
beta2=0.999

#fitting
learning_rate=[0.001]#[0.001,0.002,0.003,0.004]
batch_size=[25]#[5,10,15,20,25]

# Training the model

In [170]:
val_acc=np.zeros([len(num_layer),len(num_cell),len(drop_ratio),len(bidirectional),len(learning_rate),len(batch_size)])
tra_acc=np.zeros([len(num_layer),len(num_cell),len(drop_ratio),len(bidirectional),len(learning_rate),len(batch_size)])
for il in range(len(num_layer)):
    for ic in range(len(num_cell)):
        for idr in range(len(drop_ratio)):
            for ibr in range(len(bidirectional)):
                for ilr in range(len(learning_rate)):
                    for ibs in range(len(batch_size)):
                        # might want to change the metric here
                        model = Model_V2((maxLen,), word_to_vec_map, word_to_index,num_layer[il],num_cell[ic],drop_ratio[idr],bidirectional[ibr])
                        optimizer = Adam(lr=learning_rate[ilr], beta_1=beta1, beta_2=beta2, decay=0.0, epsilon=None)
                        model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
                        # train the model
                        model_fitting = model.fit(X_train_indices, Y_train, epochs = 20, batch_size = batch_size[ibs], shuffle=True,validation_data=(X_dev_indices, Y_dev))
                        loss, acc = model.evaluate(X_test_indices, Y_test)
                        model.save('my_model.h5')
                        print("Test accuracy = ", acc)
                        val_acc[il][ic][idr][ibr][ilr][ibs] = model_fitting.history['val_acc'][-1]
                        tra_acc[il][ic][idr][ibr][ilr][ibs] = model_fitting.history['acc'][-1]
                    

l2
Train on 201 samples, validate on 184 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy =  0.8309782608695652
b2
Train on 201 samples, validate on 184 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy =  0.8342391304347826
l2
Train on 201 samples, validate on 184 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy =  0.8364130434782608
b2
Train on 201 samples, validate on 184 samples
Epo

In [173]:
for il in range(len(num_layer)):
    for ic in range(len(num_cell)):
        for idr in range(len(drop_ratio)):
            for ibr in range(len(bidirectional)):
                for ilr in range(len(learning_rate)):
                    for ibs in range(len(batch_size)):
                        print ('num_layer:'+str(num_layer[il]),'num_cell:'+str(num_cell[ic]),'drop_ratio:'+str(drop_ratio[idr]),'bidirectional:'+str(bidirectional[ibr]))
                        print('val_acc'+str(val_acc[il][ic][idr][ibr][ilr][ibs]),'tra_acc'+str(tra_acc[il][ic][idr][ibr][ilr][ibs]))

num_layer:1 num_cell:32 drop_ratio:0.1 bidirectional:False
val_acc[0.82065218] tra_acc[0.83084577]
num_layer:1 num_cell:32 drop_ratio:0.1 bidirectional:True
val_acc[0.86413043] tra_acc[0.85572139]
num_layer:1 num_cell:32 drop_ratio:0.2 bidirectional:False
val_acc[0.83152173] tra_acc[0.85572139]
num_layer:1 num_cell:32 drop_ratio:0.2 bidirectional:True
val_acc[0.85326087] tra_acc[0.85572139]
num_layer:1 num_cell:32 drop_ratio:0.3 bidirectional:False
val_acc[0.87499999] tra_acc[0.86567164]
num_layer:1 num_cell:32 drop_ratio:0.3 bidirectional:True
val_acc[0.86413044] tra_acc[0.89054726]
num_layer:1 num_cell:64 drop_ratio:0.1 bidirectional:False
val_acc[0.85326087] tra_acc[0.92537313]
num_layer:1 num_cell:64 drop_ratio:0.1 bidirectional:True
val_acc[0.83152173] tra_acc[0.93034826]
num_layer:1 num_cell:64 drop_ratio:0.2 bidirectional:False
val_acc[0.88043479] tra_acc[0.920398]
num_layer:1 num_cell:64 drop_ratio:0.2 bidirectional:True
val_acc[0.83695652] tra_acc[0.88557213]
num_layer:1 num_c

In [252]:
print(val_acc.shape)
np.mean(val_acc[:,:,:,:,:,:,:])


(2, 4, 3, 2, 2, 1, 1)


0.421648549489623

In [249]:
val_acc

array([[[[[[[0.82065218]],

           [[0.        ]]],


          [[[0.86413043]],

           [[0.        ]]]],



         [[[[0.83152173]],

           [[0.        ]]],


          [[[0.85326087]],

           [[0.        ]]]],



         [[[[0.87499999]],

           [[0.        ]]],


          [[[0.86413044]],

           [[0.        ]]]]],




        [[[[[0.85326087]],

           [[0.        ]]],


          [[[0.83152173]],

           [[0.        ]]]],



         [[[[0.88043479]],

           [[0.        ]]],


          [[[0.83695652]],

           [[0.        ]]]],



         [[[[0.85326086]],

           [[0.        ]]],


          [[[0.85326087]],

           [[0.        ]]]]],




        [[[[[0.88586957]],

           [[0.        ]]],


          [[[0.82065217]],

           [[0.        ]]]],



         [[[[0.86956522]],

           [[0.        ]]],


          [[[0.66304347]],

           [[0.        ]]]],



         [[[[0.87499999]],

           [[0.        ]

# Weight Plot

In [161]:
model_fitting.history['val_acc'][-1]
model_fitting.history['acc'][-1]

1.0

In [254]:
from __future__ import print_function

from keras import backend as K
from keras.engine import Input, Model, InputSpec
from keras.layers import Dense, Activation, Dropout, Lambda
from keras.layers import Embedding, LSTM
from keras.optimizers import Adam
from keras.preprocessing import sequence
from keras.utils.data_utils import get_file
from keras.datasets import imdb

import numpy as np
import random
import sys
import pdb

In [255]:
def visualize_model_bs(model, include_gradients=False):

    LSTM2 = model.get_layer('LSTM2')
    Dropout2 = model.get_layer('Dropout2')
    output_layer = model.get_layer('output_layer')

    inputs = []
    inputs.extend(model.inputs)

    outputs = []
    outputs.extend(model.outputs)
    outputs.append(LSTM2.output)
    outputs.append(LSTM2.cell.kernel_f)  # -- weights of the forget gates (assuming LSTM)
    #print (LSTM1.trainable_weights)

    if include_gradients:
        loss = K.mean(model.output)  # [batch_size, 1] -> scalar
        grads = K.gradients(loss, LSTM2.output)
        grads_norm = grads / (K.sqrt(K.mean(K.square(grads))) + 1e-5)
        outputs.append(grads_norm)

    all_function = K.function(inputs, outputs)
    output_function = K.function([Dropout2.input], model.outputs)
    print(Dropout2.input)
    return all_function, output_function

In [256]:
all_function, output_function = visualize_model_bs(model, include_gradients=True)

Tensor("LSTM2-last_77/strided_slice:0", shape=(?, 128), dtype=float32)


In [296]:
sentence="You you you you you you you you you"
t = np.array([sentence])
X = sentences_to_indices(t, word_to_index, maxLen)
# -- Return scores, raw rnn values and gradients
# scores is equivalent to model.predict(X)
scores, rnn_values, rnn_gradients, W_i = all_function([X])
print(scores.shape, rnn_values.shape, rnn_gradients.shape, W_i.shape)

# -- score prediction
print("Scores:", scores)

# -- Return scores at each step in the time sequence
time_distributed_scores = map(lambda x: output_function([x]), rnn_values)
print("Time distributed (word-level) scores:", map(lambda x: x[0], time_distributed_scores))


(1, 1) (1, 15, 128) (256, 128) (1, 1, 15, 128)
Scores: [[0.66565865]]
Time distributed (word-level) scores: [array([[0.32266212],
       [0.19527735],
       [0.132155  ],
       [0.10744365],
       [0.10362193],
       [0.11564086],
       [0.14545973],
       [0.19823971],
       [0.2757915 ],
       [0.35430288],
       [0.43160397],
       [0.5011977 ],
       [0.5618449 ],
       [0.6155857 ],
       [0.66565865]], dtype=float32)]


In [333]:
from colored import fg, bg, attr
words=sentence.split()
base=245
scale=15


color_weight=base+np.array(rnn_values).reshape(rnn_shape)*scale

# color tuning
min_c=int(min(color_weight.reshape(15*128)))
max_c=int(max(color_weight.reshape(15*128)))
print(min_c,max_c)

for i in range(min_c,max_c):
    print ('%s%s' % (bg (i),'yes'),end="")
    
print("")
    

rnn_shape=rnn_values.shape
for i in range(rnn_shape[2]):
    #for w in range(rnn_shape[2]):
    for w in range(len(words)):
        print ('%s%s' % (bg (int(color_weight[0,w,i])),words[w]),end="")
        print ('%s '%attr(1) ,end="")
    print ('')

234 255
[48;5;234myes[48;5;235myes[48;5;236myes[48;5;237myes[48;5;238myes[48;5;239myes[48;5;240myes[48;5;241myes[48;5;242myes[48;5;243myes[48;5;244myes[48;5;245myes[48;5;246myes[48;5;247myes[48;5;248myes[48;5;249myes[48;5;250myes[48;5;251myes[48;5;252myes[48;5;253myes[48;5;254myes
[48;5;246mYou[1m [48;5;246myou[1m [48;5;246myou[1m [48;5;246myou[1m [48;5;246myou[1m [48;5;245myou[1m [48;5;245myou[1m [48;5;245myou[1m [48;5;245myou[1m 
[48;5;248mYou[1m [48;5;252myou[1m [48;5;254myou[1m [48;5;255myou[1m [48;5;255myou[1m [48;5;255myou[1m [48;5;254myou[1m [48;5;254myou[1m [48;5;253myou[1m 
[48;5;245mYou[1m [48;5;245myou[1m [48;5;244myou[1m [48;5;244myou[1m [48;5;243myou[1m [48;5;243myou[1m [48;5;244myou[1m [48;5;244myou[1m [48;5;244myou[1m 
[48;5;241mYou[1m [48;5;239myou[1m [48;5;238myou[1m [48;5;237myou[1m [48;5;237myou[1m [48;5;237myou[1m [48;5;238myou[1m [48;5;238myou[1m [48;5;239myou[1m 
[48;5;24

In [295]:
rnn_shape[2]

128

In [272]:
from colored import fg, bg, attr

words=sentence.split()
base=124
scale=6
color_weight=base+np.array(time_distributed_scores).reshape(maxLen)*scale

for i in range(len(words)):
    print ('%s%s' % (bg (int(color_weight[i])),words[i]),end="")
    print ('%s '%attr(1) ,end="")

    

[48;5;127mYou[1m [48;5;127mdo[1m [48;5;128mme[1m [48;5;128mwrong[1m [48;5;129mvery[1m [48;5;129mhave[1m [48;5;129ma[1m [48;5;129mof[1m [48;5;129mmonths[1m [48;5;129mimmediate[1m 

# Data processing dev/test/train

In [None]:
# small 
X_test, Y_test = read_csv('data/test.csv') 

In [195]:
# small
import pandas as pd
dev_ratio=0.1
total_test_num=len(X_test_indices)
dev_num = int(dev_ratio * len(X_test_indices))
X_dev_indices = []
print (dev_num)
dev_index=(np.random.random([dev_num])*total_test_num).astype(int)

X_dev_indices=X_test_indices[dev_index,:]
X_dev = X_test[dev_index]
Y_dev = Y_test[dev_index]

X_test_after_dev=np.delete(X_test,dev_index,0)
Y_test_after_dev=np.delete(Y_test,dev_index,0)

# test after dev
test_after_dev={'X': X_test_after_dev, 'Y': Y_test_after_dev}
test_after_dev = pd.DataFrame(test_after_dev)
test_after_dev.to_csv('test_minus_dev.csv',header=False,index=False)

# dev
dev={'X': X_dev, 'Y': Y_dev}
dev = pd.DataFrame(dev)
dev.to_csv('dev.csv',header=False,index=False)

184


In [197]:
# big
X_test, Y_test = read_csv('data/test-big.csv')

# small
import pandas as pd
dev_ratio=0.1
total_test_num=len(X_test_indices)
dev_num = int(dev_ratio * len(X_test_indices))
X_dev_indices = []
print (dev_num)
dev_index=(np.random.random([dev_num])*total_test_num).astype(int)

X_dev_indices=X_test_indices[dev_index,:]
X_dev = X_test[dev_index]
Y_dev = Y_test[dev_index]

X_test_after_dev=np.delete(X_test,dev_index,0)
Y_test_after_dev=np.delete(Y_test,dev_index,0)

# test after dev
test_after_dev={'X': X_test_after_dev, 'Y': Y_test_after_dev}
test_after_dev = pd.DataFrame(test_after_dev)
test_after_dev.to_csv('test_minus_dev_big.csv',header=False,index=False)

# dev
dev={'X': X_dev, 'Y': Y_dev}
dev = pd.DataFrame(dev)
dev.to_csv('dev_big.csv',header=False,index=False)

184
