In [154]:
import numpy as np
from utils import *
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Lambda
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform

X_train, Y_train = read_csv('data/train.csv') 
X_test, Y_test = read_csv('data/test.csv') 

maxLen = 15
print(maxLen)

word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('data/glove.6B.50d.txt')

def sentences_to_indices(X, word_to_index, max_len):
    """
    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
    The output shape should be such that it can be given to `Embedding()` (described in Figure 4). 
    
    Arguments:
    X -- array of sentences (strings), of shape (m, 1)
    word_to_index -- a dictionary containing the each word mapped to its index
    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. 
    
    Returns:
    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
    """

    m = X.shape[0]
    X_indices = np.zeros((m, max_len))
    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0
        for w in sentence_words:
            if w not in word_to_index:
                X_indices[i, j] = 0 # HACK - FIX SOON
            else:
                if j >= maxLen:
                    print (sentence_words)
                X_indices[i, j] = word_to_index[w]
            j = j+1
    return X_indices

def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["lemon"].shape[0]
    emb_matrix = np.zeros((vocab_len, emb_dim)) # curious why not transpose of this...
    # Sets each row "index" of the embedding matrix to be 
    # the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]
    
    embedding_layer = Embedding(vocab_len, emb_dim, trainable = False)

    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix]) # now it's pretrained!

    return embedding_layer

embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)

def Model_V1(input_shape, word_to_vec_map, word_to_index):
    """
    Function creating the Model-V1 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """
    
    sentence_indices = Input(shape = input_shape, dtype = np.int32)
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)

    # Propagates sentence_indices through the embedding layer
    embeddings = embedding_layer(sentence_indices)

    # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state
    LSTM1 = LSTM(128, return_sequences = True,name='LSTM1')(embeddings)
    # Adds dropout with probability 0.5
    X = Dropout(0.5)(LSTM1)
    # Another LSTM layer, but just returns one output
    LSTM2 = LSTM(128, return_sequences = True, name='LSTM2')(X)
    
    def get_last(X):
        return X[:,-1,:]
    
    LSTM2Last = Lambda(get_last, name='LSTM2-last')(LSTM2)
    Dropout2 = Dropout(0.5,name='Dropout2')(LSTM2Last)
    
    # Propagating through a Dense layer with sigmoid activation to get back a scalar
    Dense1 = Dense(1,name='Dense1')(Dropout2)
    X = Activation('sigmoid',name='output_layer')(Dense1)

    model = Model(inputs = sentence_indices, outputs = X)

    return model

model = Model_V1((maxLen,), word_to_vec_map, word_to_index)

# might want to change the metric here
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)


15


In [156]:
# train the model
model.fit(X_train_indices, Y_train, epochs = 20, batch_size = 6, shuffle=True,validation_data=(X_dev_indices, Y_dev))
loss, acc = model.evaluate(X_test_indices, Y_test)
model.save('my_model.h5')
print()
print("Test accuracy = ", acc)

Train on 201 samples, validate on 184 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Test accuracy =  0.8592391304347826


In [18]:
from __future__ import print_function

from keras import backend as K
from keras.engine import Input, Model, InputSpec
from keras.layers import Dense, Activation, Dropout, Lambda
from keras.layers import Embedding, LSTM
from keras.optimizers import Adam
from keras.preprocessing import sequence
from keras.utils.data_utils import get_file
from keras.datasets import imdb

import numpy as np
import random
import sys
import pdb

In [19]:
def visualize_model_bs(model, include_gradients=False):

    LSTM2 = model.get_layer('LSTM2')
    Dropout2 = model.get_layer('Dropout2')
    output_layer = model.get_layer('output_layer')

    inputs = []
    inputs.extend(model.inputs)

    outputs = []
    outputs.extend(model.outputs)
    outputs.append(LSTM2.output)
    outputs.append(LSTM2.cell.kernel_f)  # -- weights of the forget gates (assuming LSTM)
    #print (LSTM1.trainable_weights)

    if include_gradients:
        loss = K.mean(model.output)  # [batch_size, 1] -> scalar
        grads = K.gradients(loss, LSTM2.output)
        grads_norm = grads / (K.sqrt(K.mean(K.square(grads))) + 1e-5)
        outputs.append(grads_norm)

    all_function = K.function(inputs, outputs)
    output_function = K.function([Dropout2.input], model.outputs)
    print(Dropout2.input)
    return all_function, output_function

In [20]:
all_function, output_function = visualize_model_bs(model, include_gradients=True)

Tensor("LSTM2-last_5/strided_slice:0", shape=(?, 128), dtype=float32)


In [180]:
sentence="You do me wrong very have a of months immediate"
t = np.array([sentence])
X = sentences_to_indices(t, word_to_index, maxLen)
# -- Return scores, raw rnn values and gradients
# scores is equivalent to model.predict(X)
scores, rnn_values, rnn_gradients, W_i = all_function([X])
print(scores.shape, rnn_values.shape, rnn_gradients.shape, W_i.shape)

# -- score prediction
print("Scores:", scores)

# -- Return scores at each step in the time sequence
time_distributed_scores = map(lambda x: output_function([x]), rnn_values)
print("Time distributed (word-level) scores:", map(lambda x: x[0], time_distributed_scores))

(1, 1) (1, 15, 128) (128, 128) (1, 1, 15, 128)
Scores: [[0.90563]]
Time distributed (word-level) scores: [array([[0.55840904],
       [0.6682497 ],
       [0.7709293 ],
       [0.8368673 ],
       [0.871035  ],
       [0.8879638 ],
       [0.8953616 ],
       [0.89667165],
       [0.9008593 ],
       [0.90035504],
       [0.9031226 ],
       [0.90380293],
       [0.9046281 ],
       [0.9052044 ],
       [0.9056301 ]], dtype=float32)]


In [179]:
from colored import fg, bg, attr

words=sentence.split()
base=124
scale=6
color_weight=base+np.array(time_distributed_scores).reshape(maxLen)*scale

for i in range(len(words)):
    print ('%s %s' % (bg (int(color_weight[i])),words[i]))

for i in range(base,base+scale):
    print ('%s %s' % (bg (i),attr(1),'yes'))
    

[48;5;126m very
[48;5;127m have
[48;5;127m a
[48;5;126m of
[48;5;126m months
[48;5;126m immediate


TypeError: not all arguments converted during string formatting

# Data processing dev/test/train

In [None]:
# small 
X_test, Y_test = read_csv('data/test.csv') 



In [195]:
# small
import pandas as pd
dev_ratio=0.1
total_test_num=len(X_test_indices)
dev_num = int(dev_ratio * len(X_test_indices))
X_dev_indices = []
print (dev_num)
dev_index=(np.random.random([dev_num])*total_test_num).astype(int)

X_dev_indices=X_test_indices[dev_index,:]
X_dev = X_test[dev_index]
Y_dev = Y_test[dev_index]

X_test_after_dev=np.delete(X_test,dev_index,0)
Y_test_after_dev=np.delete(Y_test,dev_index,0)

# test after dev
test_after_dev={'X': X_test_after_dev, 'Y': Y_test_after_dev}
test_after_dev = pd.DataFrame(test_after_dev)
test_after_dev.to_csv('test_minus_dev.csv',header=False,index=False)

# dev
dev={'X': X_dev, 'Y': Y_dev}
dev = pd.DataFrame(dev)
dev.to_csv('dev.csv',header=False,index=False)

184


In [197]:
# big
X_test, Y_test = read_csv('data/test-big.csv')

# small
import pandas as pd
dev_ratio=0.1
total_test_num=len(X_test_indices)
dev_num = int(dev_ratio * len(X_test_indices))
X_dev_indices = []
print (dev_num)
dev_index=(np.random.random([dev_num])*total_test_num).astype(int)

X_dev_indices=X_test_indices[dev_index,:]
X_dev = X_test[dev_index]
Y_dev = Y_test[dev_index]

X_test_after_dev=np.delete(X_test,dev_index,0)
Y_test_after_dev=np.delete(Y_test,dev_index,0)

# test after dev
test_after_dev={'X': X_test_after_dev, 'Y': Y_test_after_dev}
test_after_dev = pd.DataFrame(test_after_dev)
test_after_dev.to_csv('test_minus_dev_big.csv',header=False,index=False)

# dev
dev={'X': X_dev, 'Y': Y_dev}
dev = pd.DataFrame(dev)
dev.to_csv('dev_big.csv',header=False,index=False)

184
