In [21]:
import theano.tensor as T
from theano import function

def mean(x, axis=None, keepdims=False):
    return T.mean(x, axis=axis, keepdims=keepdims)

def l2_normalize(x, axis):
    norm = T.sqrt(T.sum(T.square(x), axis=axis, keepdims=True))
    return x / norm

def cosine_similarity(y_true, y_pred):
    assert y_true.ndim == 2
    assert y_pred.ndim == 2
    y_true = l2_normalize(y_true, axis=1)
    y_pred = l2_normalize(y_pred, axis=1)
    return T.sum(y_true * y_pred, axis=1, keepdims=False)

def GESD(y_true, y_pred):
    assert y_true.ndim == 2
    assert y_pred.ndim == 2
    y_true = l2_normalize(y_true, axis=1)
    y_pred = l2_normalize(y_pred, axis=1)
    eucledian_dist = T.sqrt(T.sum(T.square(y_true - y_pred), axis=1, keepdims=True))
    part1 = 1.0 / (1.0 + eucledian_dist)
    gamma = 1.0
    c = 1.0
    part2 = 1.0 / (1.0 + T.exp(-gamma * (T.sum(y_true * y_pred, axis=1, keepdims=False) + c)))
    return T.sum(part1 * part2, axis=1, keepdims=False)

def AESD(y_true, y_pred):
    assert y_true.ndim == 2
    assert y_pred.ndim == 2
    y_true = l2_normalize(y_true, axis=1)
    y_pred = l2_normalize(y_pred, axis=1)
    eucledian_dist = T.sqrt(T.sum(T.square(y_true - y_pred), axis=1, keepdims=True))
    part1 = 1.0 / (1.0 + eucledian_dist)
    gamma = 1.0
    c = 1.0
    part2 = 1.0 / (1.0 + T.exp(-gamma * (T.sum(y_true * y_pred, axis=1, keepdims=False) + c)))
    return T.sum(part1 + part2, axis=1, keepdims=False)

def GESD_ranking_loss(y_true, y_pred):
    MARGIN = 0.01
    
    q = y_pred[0::3]
    a_correct = y_pred[1::3]
    a_incorrect = y_pred[2::3]

    return mean(T.maximum(0., MARGIN - GESD(q, a_correct) + GESD(q, a_incorrect)) - y_true[0]*0, axis=-1)

def AESD_ranking_loss(y_true, y_pred):
    MARGIN = 0.01
    
    q = y_pred[0::3]
    a_correct = y_pred[1::3]
    a_incorrect = y_pred[2::3]

    return mean(T.maximum(0., MARGIN - AESD(q, a_correct) + AESD(q, a_incorrect)) - y_true[0]*0, axis=-1)

### test

In [22]:
x = T.dmatrix('x')
y = T.dmatrix('y')
z = GESD(x, y)
f = function([x, y], z)

In [23]:
y = [[1, 0], [1, 0], [3, 4], [1.2, -3.2]]
preds = [[1, 0], [0, 1], [3, 4], [1.3, 2.2]]

f(y, preds)

array([ 3.08471957,  1.27773268,  3.08471957,  1.10005398])

In [24]:
x = T.dmatrix('x')
y = T.dmatrix('y')
z = AESD(x, y)
f = function([x, y], z)

In [25]:
y = [[1, 0], [1, 0], [3, 4], [1.2, -3.2]]
preds = [[1, 0], [0, 1], [3, 4], [1.3, 2.2]]

f(y, preds)

array([ 7.08471957,  4.74157381,  7.08471957,  4.51117531])

In [27]:
from keras.layers.embeddings import Embedding
from keras.layers.core import Dense, Merge, TimeDistributedMerge
from keras.layers import recurrent
from keras.models import Sequential
from keras.layers.convolutional import MaxPooling1D
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

import numpy as np

print "Creating data..."

data = []
for i in xrange(1000):
    q0 = np.random.choice(range(10))
    q1 = np.random.choice(range(10))
    q = str(q0) + "+" + str(q1)
    a_correct = str(q0 + q1)
    a_incorrect = str(np.random.choice(range(19)))
    data.append(q)
    data.append(a_correct)
    data.append(a_incorrect)
    
print "Sample data:"
for i in xrange(3):
    print data[i]
    
print "Tokenizing data..."
tokenizer = Tokenizer(filters='', lower=False)
char_data = [' '.join(list(x)) for x in data]
tokenizer.fit_on_texts(char_data)

print "Converting text to sequences..."
data_seq = tokenizer.texts_to_sequences(char_data)

print "Sample sequences:"
for i in xrange(3):
    print data_seq[i]
    
data_maxlen = max([len(q) for q in data_seq])
print "Questions maxlen:", data_maxlen

print "Padding sequences..."
data_seq_pad = pad_sequences(data_seq, maxlen=data_maxlen) 

print "Sample padded sequences:"
for i in xrange(3):
    print data_seq_pad[i]    

Using Theano backend.
Creating data...
Sample data:
9+7
16
0
Tokenizing data...
Converting text to sequences...
Sample sequences:
[11, 2, 6]
[1, 3]
[8]
Questions maxlen: 3
Padding sequences...
Sample padded sequences:
[11  2  6]
[0 1 3]
[0 0 8]


In [28]:
print "Creating model..."

vocab_size = len(tokenizer.word_index) + 1
embed_size = 5
hidden_size = 10
timesteps = data_maxlen
RNN = recurrent.LSTM

print "Vocab_size: ", vocab_size
print "Timesteps: ", timesteps

model = Sequential()
model.add(Embedding(vocab_size, embed_size, mask_zero=True))
model.add(RNN(hidden_size, return_sequences=False))

Creating model...
Vocab_size:  12
Timesteps:  3


In [29]:
model.summary()

--------------------------------------------------------------------------------
Initial input shape: (None, 12)
--------------------------------------------------------------------------------
Layer (name)                  Output Shape                  Param #             
--------------------------------------------------------------------------------
Embedding (embedding)         (None, None, 5)               60                  
LSTM (lstm)                   (None, 10)                    640                 
--------------------------------------------------------------------------------
Total params: 700
--------------------------------------------------------------------------------


In [31]:
print "Compiling model..."
model.compile(optimizer="adam", loss=GESD_ranking_loss)

Compiling model...


In [32]:
_ = [0 for i in xrange(len(data_seq_pad))]
model.fit(data_seq_pad, _, batch_size=6, nb_epoch=10, shuffle=False, validation_split=0.0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x18589e48>

In [37]:
print "Compiling model..."
model.compile(optimizer="adam", loss=AESD_ranking_loss)

Compiling model...


In [39]:
sub_seq_pad = data_seq_pad[:12][:]
_ = [0 for i in xrange(len(sub_seq_pad))]
model.fit(sub_seq_pad, _, batch_size=6, nb_epoch=10, shuffle=False, validation_split=0.0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23cb3f98>