In [1]:
import argparse
import cPickle as pickle
import numpy as np
from keras.layers.embeddings import Embedding
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import recurrent
from keras.models import Graph, Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
import theano.tensor as T
from theano import function


Using Theano backend.


Using gpu device 0: GeForce GTX 780 (CNMeM is disabled)


In [14]:
def mean(x, axis=None, keepdims=False):
    return T.mean(x, axis=axis, keepdims=keepdims)

def l2_normalize(x, axis):
    norm = T.sqrt(T.sum(T.square(x), axis=axis, keepdims=True))
    return x / norm

def cosine_similarity(y_true, y_pred):
    assert y_true.ndim == 2, "y_true.ndim: " + str(y_true.ndim)
    assert y_pred.ndim == 2, "y_pred.ndim: " + str(y_pred.ndim)
    y_true = l2_normalize(y_true, axis=1)
    y_pred = l2_normalize(y_pred, axis=1)
    return T.sum(y_true * y_pred, axis=1, keepdims=False)

def cosine_ranking_loss(y_true, y_pred):
    MARGIN = 0.01
    
    q = y_pred[0::3]
    a_correct = y_pred[1::3]
    a_incorrect = y_pred[2::3]

    return mean(T.maximum(0., MARGIN - cosine_similarity(q, a_correct) + cosine_similarity(q, a_incorrect)) - y_true[0]*0, axis=-1)


In [4]:
print "Loading data..."
texts = pickle.load(open("../data/simple.pkl", "rb"))
assert texts.shape[0] % 3 == 0

Loading data...


In [5]:
texts = texts[:3]

In [7]:
vocab_size = np.max(texts) + 1
print "Vocabulary size:", vocab_size, "Texts: ", texts.shape

Vocabulary size: 3418 Texts:  (3, 77)


In [8]:
  RNN = recurrent.GRU

In [16]:
  model = Sequential()
  model.add(Embedding(vocab_size, 300, mask_zero=True))
  model.add(RNN(1024, return_sequences=False))

In [17]:
model.summary()

--------------------------------------------------------------------------------
Initial input shape: (None, 3418)
--------------------------------------------------------------------------------
Layer (name)                  Output Shape                  Param #             
--------------------------------------------------------------------------------
Embedding (embedding)         (None, None, 300)             1025400             
GRU (gru)                     (None, 1024)                  4070400             
--------------------------------------------------------------------------------
Total params: 5095800
--------------------------------------------------------------------------------


In [18]:
model.compile(optimizer="adam", loss=cosine_ranking_loss)

In [19]:
  model.fit(texts, np.empty((texts.shape[0], 1024)), batch_size=3, nb_epoch=10,
      validation_split=0, verbose=1, shuffle=False)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd294d40c10>

In [20]:
pred = model.predict(texts, batch_size=1)

In [22]:
pred.shape

(3, 1024)

In [23]:
pred[0]

array([ 0.45514768,  0.4300901 ,  0.46153972, ...,  0.28196892,
        0.50680733,  0.41532838])

In [24]:
pred[1]

array([ 0.44766107,  0.42841181,  0.45364702, ...,  0.28176054,
        0.51277733,  0.41849643])

In [25]:
pred[2]

array([ 0.43293893,  0.42733499,  0.4373728 , ...,  0.28101972,
        0.52637863,  0.42582652])

In [26]:
x = T.dmatrix('x')
y = T.dmatrix('y')
z = cosine_similarity(x, y)
f = function([x, y], z)

In [30]:
print f([pred[0]], [pred[1]])
print f([pred[0]], [pred[2]])

[ 0.99993182]
[ 0.99904109]
