In [1]:
import tensorflow as tf
import math
import numpy as np

In [2]:
def arctanh(x):
    return tf.log(tf.divide(1+x,1-x))

Initialise all of the variables

In [3]:
def inner_prod(r_in, r_out, theta_in, theta_out):
    cosine = tf.cos(theta_in - theta_out)
    radius = tf.multiply(arctanh(r_in), arctanh(r_out))
    return 4 * tf.multiply(cosine, radius)

In [4]:
def minkowski_dot(u,v):
    return tf.tensordot(u,v,1) - 2*tf.multiply(u[0],v[0])

In [5]:
def exponential(base, tangent):
    """
    Compute the exponential of `tangent` from the point `base`.
    """
    #tangent = tangent.copy()
    norm = tf.sqrt(tf.maximum(minkowski_dot(tangent, tangent), 0))
    if norm == 0:
        return base
    tangent /= norm
    return tf.cosh(norm) * base + tf.sinh(norm) * tangent

In [6]:
def tensor_inner_prod(r_example, r_sample, theta_example, theta_sample):
    r1 = arctanh(r_example)
    r2 = arctanh(r_sample)
    radius_term = r1[:, None] + r2[None, :]
    cos_term = theta_example[:, None] - theta_sample[None, :]
    return tf.squeeze(4* tf.multiply(cos_term, radius_term))

In [7]:
def nce_loss(true_logits, sampled_logits):
        true_xent = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(true_logits), logits=true_logits)
        sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(sampled_logits), logits=sampled_logits)
        nce_loss_tensor = (tf.reduce_sum(true_xent) +
                           tf.reduce_sum(sampled_xent)) / 2
        return nce_loss_tensor

In [8]:
def minkowski_dist(u, v):
    """
    The distance between two points in Minkowski space
    :param u:
    :param v:
    :return:
    """
    return tf.acosh(-minkowski_dot(u, v))

In [9]:
def project_onto_tangent_space(hyperboloid_point, minkowski_tangent):
    """
    project gradients in the ambiant space onto the tangent space
    :param hyperboloid_point:
    :param minkowski_tangent:
    :return:
    """
    return minkowski_tangent + minkowski_dot(hyperboloid_point, minkowski_tangent) * hyperboloid_point

In [10]:
def exp_map(base, tangent):
    """
    Compute the exponential of the `tangent` vector from the point `base`.
    """
    # tangent = tangent.copy()
    norm = tf.sqrt(tf.maximum(minkowski_dot(tangent, tangent), 0))
    if norm == 0:
        return base
    tangent /= norm
    return tf.cosh(norm) * base + tf.sinh(norm) * tangent

In [11]:
def minkowski_tensor_dot(u, v):
    """
    Minkowski dot product is the same as the Euclidean dot product, but the first element squared is subtracted
    :param u: a tensor of shape (#examples, dims)
    :param v: a tensor of shape (#examples, dims)
    :return: a scalar dot product
    """
    assert u.shape == v.shape, 'minkowski dot product not define for different shape tensors'
    try:
        temp = np.eye(u.shape[1])
    except IndexError:
        temp = np.eye(u.shape)
    temp[0, 0] = -1.
    T = tf.constant(temp, dtype=u.dtype)
    # make the first column of v negative
    v_neg = tf.matmul(v, T)
    return tf.reduce_sum(tf.multiply(u, v_neg), 1, keep_dims=True)  # keep dims for broadcasting

In [12]:
def tensor_exp_map(hyperboloid_points, tangent_grads):
    """
    Map vectors in the tangent space of the hyperboloid points back onto the hyperboloid
    :param hyperboloid_points: a tensor of points on the hyperboloid of shape (#examples, #dims)
    :param tangent_grads: a tensor of gradients on the tangent spaces of the hyperboloid_points of shape (#examples, #dims)
    :return:
    """
    # todo do we need to normalise the gradients?
    norms = tf.sqrt(tf.maximum(minkowski_tensor_dot(tangent_grads, tangent_grads), 0))
    zero = tf.constant(0, dtype=tf.float32)
    nonzero_flags = tf.squeeze(tf.not_equal(norms, zero))
    nonzero_indices = tf.squeeze(tf.where(nonzero_flags))
    nonzero_norms = tf.boolean_mask(norms, nonzero_flags)
    updated_grads = tf.boolean_mask(tangent_grads, tf.squeeze(nonzero_flags))
    updated_points = tf.boolean_mask(hyperboloid_points, nonzero_flags)
    # if norms == 0:
    #     return hyperboloid_points
    normed_grads = tf.divide(updated_grads, nonzero_norms)
    updates = tf.multiply(tf.cosh(nonzero_norms), updated_points) + tf.multiply(tf.sinh(nonzero_norms), normed_grads)
    return tf.scatter_update(hyperboloid_points, nonzero_indices, updates)

In [13]:
def project_tensors_onto_tangent_space(hyperboloid_points, ambient_gradients):
    """
    project gradients in the ambiant space onto the tangent space
    :param hyperboloid_point: A point on the hyperboloid
    :param ambient_gradient: The gradient to project
    :return: gradients in the tangent spaces of the hyperboloid points
    """
    return ambient_gradients + tf.multiply(minkowski_tensor_dot(hyperboloid_points, ambient_gradients),
                                           hyperboloid_points)

In [14]:
def transform_grads(grad):
    """
    multiply by the inverse of the Minkowski metric tensor g = diag[-1, 1,1 ... 1] to make the first element of each
    grad vector negative
    :param grad: grad matrix of shape (n_vars, embedding_dim)
    :return:
    """
    x = np.eye(grad.shape[1])
    x[0, 0] = -1.
    T = tf.constant(x, dtype=grad.dtype)
    return tf.matmul(grad, T)

In [15]:
def rsgd(grads, vecs, lr=0.1):
    """
    Perform the Riemannian gradient descent operation by
    1/ Transforming gradients using the Minkowski metric tensor
    2/ Projecting onto the tangent space
    3/ Applying the exponential map
    :param grads:
    :param var:
    :param lr:
    :return:
    """
    minkowski_grads = transform_grads(grads)
    tangent_grads = project_tensors_onto_tangent_space(vecs, minkowski_grads)
    return tensor_exp_map(vecs, lr * tangent_grads)

In [16]:
g1 = tf.constant([[1., 1.], [2., -1.], [3., 2.], [4., 0.]])
retval1 = np.array([[-1., 1.], [-2., -1.], [-3., 2.], [-4., 0.]])
transformed_grads = transform_grads(g1)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
tgs = sess.run(transformed_grads)
print(tgs)
assert np.array_equal(tgs, retval1)

[[-1.  1.]
 [-2. -1.]
 [-3.  2.]
 [-4.  0.]]


In [17]:
p1 = tf.Variable([[1., 0.], [1., 0.], [1., 0.], [1., 0.]])  # this the minima of the hyperboloid
p2 = tf.Variable([[ 1.54308057, 1.17520118],[ 1.54308057,-1.17520118],[ 3.76219535,3.62686038],[ 1.,0.]])
g1 = tf.constant([[1., 1.], [2., -1.], [3., 2.], [4., 0.]])
retval1 = np.array([[-1.], [-1.], [-1.], [-1.]])
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# here the tangent space is x=1
# print(sess.run(rsgd(g1, p1)))
minkowski_grads = transform_grads(g1)
tangent_grads = project_tensors_onto_tangent_space(p2, minkowski_grads)
pnew = tensor_exp_map(p2, tangent_grads)
# # check that the points are on the hyperboloid
# # print(sess.run(p2))
print('minkowski grads', sess.run(minkowski_grads))
print('tangent space grads', sess.run(tangent_grads))
new_points = sess.run(pnew)
print('new points', new_points)
# norms = sess.run(minkowski_tensor_dot(pnew, pnew))
norms = sess.run(minkowski_tensor_dot(new_points, new_points))
print(norms)
assert np.array_equal(np.around(norms, 3), retval1)

('minkowski grads', array([[-1.,  1.],
       [-2., -1.],
       [-3.,  2.],
       [-4.,  0.]], dtype=float32))
('tangent space grads', array([[  3.19452763,   4.19452763],
       [  4.57562494,  -6.00795794],
       [ 66.75225067,  69.24310303],
       [  0.        ,   0.        ]], dtype=float32))
('new points', array([[  2.06089172e+01,   2.05846424e+01],
       [  6.67124863e+01,  -6.67050018e+01],
       [  3.63710016e+08,   3.63710048e+08],
       [  1.00000000e+00,   0.00000000e+00]], dtype=float32))
[[ -9.99969482e-01]
 [ -9.98535156e-01]
 [  2.57698038e+10]
 [ -1.00000000e+00]]


AssertionError: 

In [18]:
def minkowski_vector_dot(u, v):
    """
        Minkowski dot product is the same as the Euclidean dot product, but the first element squared is subtracted
        :param u: a vector
        :param v: a vector
        :return: a scalar dot product
        """
    assert u.shape == v.shape, 'minkowski dot product not define for different shape vectors'
    # assert that the vectors have only 1d.
    # todo this currently fails because exp_map returns tensors with shape = None
    # assert u.get_shape().ndims == 1, 'applied minkowski_vector_dot to a tensor. Try using minkowski_tensor_dot'

    return tf.tensordot(u, v, 1) - 2 * tf.multiply(u[0], v[0])

In [19]:
def project_onto_tangent_space(hyperboloid_point, ambient_gradient):
    """
    project gradients in the ambiant space onto the tangent space
    :param hyperboloid_point: A point on the hyperboloid
    :param ambient_gradient: The gradient to project
    :return:
    """
    return ambient_gradient + minkowski_vector_dot(hyperboloid_point, ambient_gradient) * hyperboloid_point

In [20]:
point = tf.Variable([3.76219535,3.62686038])
g1 = tf.constant([-3., 2.])
# minkowski_grads = transform_grads(g1)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(project_onto_tangent_space(point, g1)))

[ 66.75225067  69.24310303]


In [21]:
points = tf.Variable([[1., 0.], [1., 0.], [1., 0.], [1., 0.]])  # this the minima of the hyperboloid
grads = tf.Variable([[1., 1.], [2., -1.], [3., 2.], [4., 0.]])
retval1 = np.array([[-1.], [-1.], [-1.], [-1.]])
sess = tf.Session()
lr = 0.1
init = tf.global_variables_initializer()
sess.run(init)
vals = []
for i in range(3):
    vals.append(sess.run(points))
    print(vals[i])
    print(sess.run(minkowski_tensor_dot(vals[i], vals[i])))
#     print(sess.run(points))
    points = rsgd(grads, points)
#     vals.append(points)
    # check that the points are on the hyperboloid
#     norms = sess.run(minkowski_tensor_dot(points, points))
#     print(norms)
#     try:
#         assert np.array_equal(np.around(norms, 3), retval1)
#     except AssertionError:
#         print(sess.run(points))
print(sess.run(vals))

[[ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]]
[[-1.]
 [-1.]
 [-1.]
 [-1.]]
[[ 1.00500417  0.10016676]
 [ 1.00500417 -0.10016676]
 [ 1.02006674  0.20133603]
 [ 1.          0.        ]]
[[-1.]
 [-1.]
 [-1.]
 [-1.]]
[[ 1.05628109  0.34019053]
 [ 1.06826913 -0.37576473]
 [ 1.36538982  0.92967153]
 [ 1.          0.        ]]
[[-1.00000012]
 [-0.99999988]
 [-1.00000024]
 [-1.        ]]


TypeError: Fetch argument array([[ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.]], dtype=float32) has invalid type <type 'numpy.ndarray'>, must be a string or Tensor. (Can not convert a ndarray into a Tensor or Operation.)

In [None]:
def circ_sample():
    from matplotlib.pyplot import scatter
    # radius of the circle
    circle_r = 1
    # center of the circle (x, y)
    circle_x = 0
    circle_y = 0

    # random angle
    alpha = 2 * math.pi * np.random.rand(1000)
    # random radius
    r = circle_r * np.sqrt(np.random.rand(1000))
    # calculating coordinates
    x = r * np.cos(alpha) + circle_x
    y = r * np.sin(alpha) + circle_y
#     scatter(x,y)
    retval = np.concatenate((x,y), axis=0)
    print(retval)
    return retval

In [None]:
def to_hyperboloid_points(poincare_pts):
    """
    Post: result.shape[1] == poincare_pts.shape[1] + 1
    """
    norm_sqd = (poincare_pts ** 2).sum(axis=1)
    N = poincare_pts.shape[1]
    result = np.zeros((poincare_pts.shape[0], N + 1), dtype=np.float64)
    result[:, 1:] = (2. / (1 - norm_sqd))[:, np.newaxis] * poincare_pts
    result[:, 0] = (1 + norm_sqd) / (1 - norm_sqd)
    return result

In [22]:
sess = tf.Session()
lr = 0.1
tens = tf.Variable(hyp_points)
init = tf.global_variables_initializer()
sess.run(init)
assert np.array_equal(np.around(sess.run(minkowski_tensor_dot(tens, tens)),3), np.array(100 * [[-1.]]))

NameError: name 'hyp_points' is not defined

In [23]:
init_width = 1
vocab_size = 2
embedding_size = 3
poincare_pts = np.random.uniform(-init_width, init_width, (vocab_size, embedding_size))

In [24]:
def to_hyperboloid_points(vocab_size, embedding_size, init_width):
    """
    Post: result.shape[1] == poincare_pts.shape[1] + 1
    """
    poincare_pts = np.random.uniform(-init_width, init_width, (vocab_size, embedding_size))
    norm_sqd = (poincare_pts ** 2).sum(axis=1)
    # the hyperboloid has one extra ambient dimension
    result = np.zeros((poincare_pts.shape[0], embedding_size + 1), dtype=np.float64)
    result[:, 1:] = (2. / (1 - norm_sqd))[:, np.newaxis] * poincare_pts
    result[:, 0] = (1 + norm_sqd) / (1 - norm_sqd)
    return result

In [25]:
def forward(examples):
    emb = tf.Variable(to_hyperboloid_points(4, 2, 1),
                                       name="emb", dtype=tf.float32)
    
    return tf.nn.embedding_lookup(emb, examples)

sess = tf.Session()
lr = 0.1
sess.run(init)
examples = forward([1,2])
init = tf.global_variables_initializer()
print(examples.shape)


(2, 3)


In [26]:
def tf_distance(x, y):
    """
    The distance between two vectors
    :param x: shape (1, ndims)
    :param y: shape (1,ndims)
    :return: a scalar hyperbolic distance
    """
    norm_square = tf.square(tf.norm(x - y, axis=0))
    print norm_square
    denom1 = 1 - tf.square(tf.norm(x, axis=0))
    print denom1
    denom2 = 1 - tf.square(tf.norm(y, axis=0))
    print denom2
    arg = 1 + 2 * norm_square / (denom1 * denom2)
    print arg
    return tf.acosh(arg)

In [27]:
def get_logits(example, label, sample, true_b, sample_b):
    true_logits = tf_distance(example, label) + true_b
    sampled_logits = tf_distance(example, sample) + sample_b
    return true_logits, sampled_logits

In [28]:
def test_grads_vectors():
    """
    tests the gradients of the pairwise and elementwise distance functions with 1 sample, 1 example and 1 label
    :return:
    """
    embedding_size = 2
    vocab_size = 4
    emb = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -0.1, 0.1))
    sm_w_t = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -0.1, 0.1))
    # sm_w_t = tf.Variable(tf.zeros([vocab_size, embedding_size]))
    sm_b = tf.Variable(tf.zeros([vocab_size]))

    examples = tf.Variable([1,2])
    labels = tf.Variable([2,2])
    sampled_ids = tf.Variable([1,3])

    example_emb = tf.nn.embedding_lookup(emb, examples)
    # Weights for labels: [batch_size, emb_dim]
    true_w = tf.nn.embedding_lookup(sm_w_t, labels)
    # Biases for labels: [batch_size, 1]
    true_b = tf.nn.embedding_lookup(sm_b, labels)
    sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids)
    print('emb shape: ', example_emb.shape)
    print('sample w shape: ', sampled_w.shape)
    # Biases for sampled ids: [num_sampled, 1]
    sampled_b = tf.nn.embedding_lookup(sm_b, sampled_ids)
    true_logits, sampled_logits = get_logits(example_emb, true_w, sampled_w, true_b, sampled_b)
    loss = nce_loss(true_logits, sampled_logits)
    opt = tf.train.GradientDescentOptimizer(0.1)
    emb_grad = opt.compute_gradients(loss, [emb])
    sm_w_t_grad = opt.compute_gradients(loss, [sm_w_t])
    grads = emb_grad + sm_w_t_grad
    apply_grad = opt.apply_gradients(grads)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print 'original vectors: ', sess.run([example_emb, true_w])
        print 'emb grads are: ', sess.run(emb_grad)
        print 'sm_w_t grads are: ', sess.run(sm_w_t_grad)
        sess.run(apply_grad)
        print 'updated vectors: ', sess.run([example_emb, true_w])

In [29]:
test_grads_vectors()

('emb shape: ', TensorShape([Dimension(2), Dimension(2)]))
('sample w shape: ', TensorShape([Dimension(2), Dimension(2)]))
Tensor("Square:0", shape=(2,), dtype=float32)
Tensor("sub_2:0", shape=(2,), dtype=float32)
Tensor("sub_3:0", shape=(2,), dtype=float32)
Tensor("add_9:0", shape=(2,), dtype=float32)
Tensor("Square_3:0", shape=(2,), dtype=float32)
Tensor("sub_5:0", shape=(2,), dtype=float32)
Tensor("sub_6:0", shape=(2,), dtype=float32)
Tensor("add_11:0", shape=(2,), dtype=float32)
original vectors:  [array([[-0.07931326,  0.00481357],
       [ 0.0957616 ,  0.00107229]], dtype=float32), array([[-0.09562647,  0.04233987],
       [-0.09562647,  0.04233987]], dtype=float32)]
emb grads are:  [(IndexedSlicesValue(values=array([[-0.57865351,  0.61299074],
       [-0.17891437,  0.79511589]], dtype=float32), indices=array([1, 2], dtype=int32), dense_shape=array([4, 2], dtype=int32)), array([[-0.03485067, -0.08269012],
       [-0.07931326,  0.00481357],
       [ 0.0957616 ,  0.00107229],
     

In [36]:
labels = tf.Variable([1,2])
labels_matrix = tf.reshape(
            tf.cast(labels,
                    dtype=tf.int64),
            [2, 1])

# Negative sampling.
sampled_ids, _, _ = (tf.nn.fixed_unigram_candidate_sampler(
    true_classes=labels_matrix,
    num_true=1,
    num_sampled=5,
    unique=True,  # set to True if all the samples need to be unique
    range_max=5,
    distortion=0.75,
    unigrams=[1,1,1,1,1]))
                     
print(sampled_ids.shape)
print(sampled_ids)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(sess.run(sampled_ids))
                  

(5,)
Tensor("FixedUnigramCandidateSampler_6:0", shape=(5,), dtype=int64)
[2 0 4 1 3]
