In [19]:
%load_ext autoreload
%autoreload 2

from hyper_qa.model import HyperQA
from hyper_qa.utilities import riemannian_gradient
import numpy as np
import tensorflow as tf
tf.enable_eager_execution()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
tf.random.set_random_seed(212)

In [9]:
embedding_matrix = np.random.normal(0, 1, (1000, 300))
def generate_samples(max_length, sample_size, vocab_size=1000, pad=False):
    lengths = []
    sample_sequences = []
    for i in range(sample_size):
        text = np.random.randint(1,1000, np.random.randint(1,max_length))
#         text = np.arange(i%max_length)
        if pad:
            sample = np.concatenate((text, [0] * (max_length - len(text))))
        else:
            sample = text
        lengths.append(len(text))
        sample_sequences.append(sample)
    if pad:
        sample_sequences = np.vstack(sample_sequences)
    return sample_sequences

In [10]:
max_length = 50
q = generate_samples(max_length, 200, pad=True)
a = generate_samples(max_length, 200, pad=True)
b = generate_samples(max_length, 200, pad=True)

In [11]:
dataset = tf.data.Dataset.from_tensor_slices((q, a, b))
dataset = dataset.shuffle(200).batch(5)
data_iter = dataset.make_one_shot_iterator()

In [6]:
q, a, b = data_iter.get_next()
model = HyperQA(1000, max_length, embedding_matrix=embedding_matrix)


In [28]:
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)


In [27]:
margin = tf.constant(5.0)
loss_history = []
steps = 500
for q1, q2, q3 in data_iter:
    with tf.GradientTape() as tape:
        sim_pos, sim_neg = model((q,a,b), training=True)
        loss = tf.reduce_mean(tf.maximum(0.0, margin + sim_neg - sim_pos))
        loss_history.append(loss)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    riem_gradients = [gradients[0]] + [riemannian_gradient(grad) for grad in gradients[1:]]
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [13]:
for param in [model.bow_q1, model.bow_q2, model.bow_q3]:
    print(tf.norm(param, axis=-1).numpy())
print(model.distance_neg)

[0.9999996  0.99999994 1.         1.         1.         1.0000001
 0.9999995  0.99999976 0.99999994 1.         0.99999964 1.
 1.0000001  0.99999994 0.99999976 0.99999976 0.99999994 0.99999976
 1.         0.99999994 0.99999994 0.9999994  0.99999964 0.9999995
 1.0000001  0.9999997  0.9999998  0.99999994 1.0000004  1.
 0.9999994  1.         1.0000002  0.99999994 0.99999946 1.0000001
 1.         1.0000001  0.99999994 0.9999995  0.99999994 1.
 0.9999996  1.         1.0000001  1.0000004  0.99999994 0.9999998
 0.9999994  0.99999994 0.9999997  0.99999976 1.         1.
 0.9999999  0.9999997  0.99999994 0.99999994 0.9999996  0.99999994
 0.99999994 0.9999998  1.0000001  0.9999999  1.0000001  1.0000002
 0.99999994 0.9999999  0.9999998  1.         1.         0.99999946
 0.9999997  1.0000001  0.9999997  1.0000001  1.0000001  0.9999998
 0.99999946 0.99999994 0.9999994  0.99999994 0.99999994 1.0000001
 0.9999995  0.99999994 0.9999994  1.0000001  0.9999996  1.
 0.9999995  1.0000001  0.99999964 1.      