In [215]:
import tensorflow as tf
import math
import numpy as np

In [216]:
def arctanh(x):
    return tf.log(tf.divide(1+x,1-x))

Initialise all of the variables

In [217]:
def inner_prod(r_in, r_out, theta_in, theta_out):
    cosine = tf.cos(theta_in - theta_out)
    radius = tf.multiply(arctanh(r_in), arctanh(r_out))
    return 4 * tf.multiply(cosine, radius)

In [218]:
def minkowski_dot(u,v):
    return tf.tensordot(u,v,1) - 2*tf.multiply(u[0],v[0])

In [219]:
def exponential(base, tangent):
    """
    Compute the exponential of `tangent` from the point `base`.
    """
    #tangent = tangent.copy()
    norm = tf.sqrt(tf.maximum(minkowski_dot(tangent, tangent), 0))
    if norm == 0:
        return base
    tangent /= norm
    return tf.cosh(norm) * base + tf.sinh(norm) * tangent

In [220]:
def tensor_inner_prod(r_example, r_sample, theta_example, theta_sample):
    r1 = arctanh(r_example)
    r2 = arctanh(r_sample)
    radius_term = r1[:, None] + r2[None, :]
    cos_term = theta_example[:, None] - theta_sample[None, :]
    return tf.squeeze(4* tf.multiply(cos_term, radius_term))

In [221]:
def nce_loss(true_logits, sampled_logits):
        true_xent = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(true_logits), logits=true_logits)
        sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(sampled_logits), logits=sampled_logits)
        nce_loss_tensor = (tf.reduce_sum(true_xent) +
                           tf.reduce_sum(sampled_xent)) / 2
        return nce_loss_tensor

In [222]:
def minkowski_dist(u, v):
    """
    The distance between two points in Minkowski space
    :param u:
    :param v:
    :return:
    """
    return tf.acosh(-minkowski_dot(u, v))

In [223]:
def project_onto_tangent_space(hyperboloid_point, minkowski_tangent):
    """
    project gradients in the ambiant space onto the tangent space
    :param hyperboloid_point:
    :param minkowski_tangent:
    :return:
    """
    return minkowski_tangent + minkowski_dot(hyperboloid_point, minkowski_tangent) * hyperboloid_point

In [224]:
def exp_map(base, tangent):
    """
    Compute the exponential of the `tangent` vector from the point `base`.
    """
    # tangent = tangent.copy()
    norm = tf.sqrt(tf.maximum(minkowski_dot(tangent, tangent), 0))
    if norm == 0:
        return base
    tangent /= norm
    return tf.cosh(norm) * base + tf.sinh(norm) * tangent

In [225]:
def minkowski_tensor_dot(u, v):
    """
    Minkowski dot product is the same as the Euclidean dot product, but the first element squared is subtracted
    :param u: a tensor of shape (#examples, dims)
    :param v: a tensor of shape (#examples, dims)
    :return: a scalar dot product
    """
    assert u.shape == v.shape, 'minkowski dot product not define for different shape tensors'
    try:
        temp = np.eye(u.shape[1])
    except IndexError:
        temp = np.eye(u.shape)
    temp[0, 0] = -1.
    T = tf.constant(temp, dtype=u.dtype)
    # make the first column of v negative
    v_neg = tf.matmul(v, T)
    return tf.reduce_sum(tf.multiply(u, v_neg), 1, keep_dims=True)  # keep dims for broadcasting

In [226]:
def pairwise_minkowski_dot(u, v):
    """
    creates a matrix of minkowski dot products M(i,j) = u[i,:]*v[j,:]
    :param examples: first set of vectors of shape (ndata1, ndim)
    :param samples: second set of vectors of shape (ndata2, ndim)
    :return: A numpy array of shape (ndata1, ndata2) of pairwise squared distances
    """
    try:
        temp = np.eye(u.shape[1])
    except IndexError:
        temp = np.eye(u.shape)
    temp[0, 0] = -1.
    T = tf.constant(temp, dtype=u.dtype)
    # make the first column of v negative
    v_neg = tf.matmul(v, T)
    return tf.matmul(u, v_neg, transpose_b=True)

In [227]:
x = np.array([[1,0],[0,1]])
y = np.array([[3,4],[5,6]])
tx = tf.Variable(x, dtype=tf.float32)
ty = tf.Variable(y, dtype=tf.float32)
retval = np.array([[-3,-5],[4,6]])
temp = np.eye(y.shape[1])
temp[0, 0] = -1.
T = tf.constant(temp, dtype=ty.dtype)
ty_neg = tf.matmul(ty, T)
z = tf.matmul(tx,ty_neg, transpose_b=True)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(z))

[[-3. -5.]
 [ 4.  6.]]


In [229]:
def tensor_exp_map(vars, indices, tangent_grads):
    """
    Map vectors in the tangent space of the hyperboloid points back onto the hyperboloid
    :param hyperboloid_points: a tensor of points on the hyperboloid of shape (#examples, #dims)
    :param tangent_grads: a tensor of gradients on the tangent spaces of the hyperboloid_points of shape (#examples, #dims)
    :return:
    """
    # todo do we need to normalise the gradients?
    hyperboloid_points = tf.nn.embedding_lookup(vars, indices)
    norms = tf.sqrt(tf.maximum(minkowski_tensor_dot(tangent_grads, tangent_grads), 0))
    zero = tf.constant(0, dtype=tf.float32)
    nonzero_flags = tf.squeeze(tf.not_equal(norms, zero))
#     nonzero_indices = tf.squeeze(tf.where(nonzero_flags))
    nonzero_indices = tf.boolean_mask(indices, nonzero_flags)
    nonzero_norms = tf.boolean_mask(norms, nonzero_flags)
    updated_grads = tf.boolean_mask(tangent_grads, tf.squeeze(nonzero_flags))
    updated_points = tf.boolean_mask(hyperboloid_points, nonzero_flags)
    # if norms == 0:
    #     return hyperboloid_points
    normed_grads = tf.divide(updated_grads, nonzero_norms)
    updates = tf.multiply(tf.cosh(nonzero_norms), updated_points) + tf.multiply(tf.sinh(nonzero_norms), normed_grads)
    return tf.scatter_update(vars, nonzero_indices, updates)

In [149]:
g1 = tf.constant([[0., 1.], [0., 1.]])
# p1 = tf.constant([[2.31737995, 2.09051466],[2.31737995, 2.09051466]])
# p1 = tf.constant([[1.,0.],[1,0.]])
# p1 = tf.constant([[ 1.02006674,  0.20133601],[ 1.02006674,  0.20133601]])
p1 = tf.constant([[ 1.08272946,  0.41509438],[ 1.08272946,  0.41509438]])
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
raw_tangent_grads = project_tensors_onto_tangent_space(p1, g1)
tangent_grads = 0.2*raw_tangent_grads
norms = tf.sqrt(tf.maximum(minkowski_tensor_dot(tangent_grads, tangent_grads), 0))
normed_grads = tf.divide(tangent_grads, norms) 
values_to_replace = tf.logical_or(tf.is_nan(normed_grads), tf.is_inf(normed_grads))
safe_grads = tf.where(values_to_replace, tf.ones_like(normed_grads), normed_grads)
updates = tf.multiply(tf.cosh(norms), p1) + tf.multiply(tf.sinh(norms), safe_grads)
print(sess.run([tangent_grads, norms, normed_grads, values_to_replace, safe_grads, updates]))
# print(sess.run(project_tensors_onto_tangent_space(p1, g1)))

[array([[ 0.08988699,  0.23446067],
       [ 0.08988699,  0.23446067]], dtype=float32), array([[ 0.21654591],
       [ 0.21654591]], dtype=float32), array([[ 0.41509435,  1.08272958],
       [ 0.41509435,  1.08272958]], dtype=float32), array([[False, False],
       [False, False]], dtype=bool), array([[ 0.41509435,  1.08272958],
       [ 0.41509435,  1.08272958]], dtype=float32), array([[ 1.19880569,  0.6611622 ],
       [ 1.19880569,  0.6611622 ]], dtype=float32)]


In [133]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
t = tf.constant(np.arange(100), dtype=tf.float32)
print(sess.run(tf.cosh(t)))
print(sess.run(tf.sinh(t)))

[  1.00000000e+00   1.54308069e+00   3.76219559e+00   1.00676622e+01
   2.73082333e+01   7.42099457e+01   2.01715637e+02   5.48317017e+02
   1.49047913e+03   4.05154199e+03   1.10132334e+04   2.99370703e+04
   8.13773984e+04   2.21206703e+05   6.01302125e+05   1.63450862e+06
   4.44305550e+06   1.20774760e+07   3.28299840e+07   8.92411520e+07
   2.42582592e+08   6.59407872e+08   1.79245645e+09   4.87240192e+09
   1.32445614e+10   3.60024515e+10   9.78648023e+10   2.66024124e+11
   7.23128549e+11   1.96566712e+12   5.34323711e+12   1.45244249e+13
   3.94814785e+13   1.07321787e+14   2.91730855e+14   7.93006723e+14
   2.15561577e+15   5.85957127e+15   1.59279657e+16   4.32967020e+16
   1.17692635e+17   3.19921737e+17   8.69637488e+17   2.36391976e+18
   6.42579994e+18   1.74671353e+19   4.74805977e+19   1.29065645e+20
   3.50836779e+20   9.53673320e+20   2.59235273e+21   7.04674518e+21
   1.91550409e+22   5.20687949e+22   1.41537661e+23   3.84739274e+23
   1.04582972e+24   2.84286001e+24

In [124]:
g1 = tf.constant([[0., 1.], [0., -1.], [0., 2.], [1., 1.]])
norms = tf.constant([0., 1., 1., 0.], shape=(4, 1))
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
normed_g = tf.divide(g1, norms)
values_to_replace = tf.logical_or(tf.is_nan(normed_g),tf.is_inf(normed_g))
g2 = tf.where(values_to_replace, tf.ones_like(normed_g), normed_g)
print(sess.run(g2))

[[ 1.  1.]
 [ 0. -1.]
 [ 0.  2.]
 [ 1.  1.]]


In [112]:
input_points = np.array([[1., 0.], [1., 0.],[4.,5.], [1., 0.], [1., 0.]])
p1 = tf.Variable(input_points, dtype=tf.float32)  # this the minima of the hyperboloid
indices = tf.constant([0,1,3,4])
g1 = tf.constant([[0., 1.], [0., -1.], [0., 2.], [0., 0.]])
retval1 = np.array([[-1.], [-1.], [-1.], [-1.]])
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# here the tangent space is x=1
new_vars = tensor_exp_map(p1, indices, g1)
em1 = tf.nn.embedding_lookup(new_vars,indices)
# check that the points are on the hyperboloid
norms = sess.run(minkowski_tensor_dot(em1, em1))
print(norms)
assert np.array_equal(np.around(norms, 3), retval1)
em1 = sess.run(em1)
new_vars = sess.run(new_vars)
np_new_vars = np.array(new_vars)
assert np.array_equal(np_new_vars[2,:],input_points[2,:])
assert np.array_equal(np_new_vars[4,:],input_points[4,:])
assert em1[0, 0] == em1[1, 0]
assert em1[0, 1] == -em1[1, 1]
assert em1[2, 0] > em1[0, 0]
assert em1[2, 1] > em1[0, 1]

[[-0.99999976]
 [-0.99999976]
 [-0.99999714]
 [-1.        ]]


In [113]:
norms = tf.constant([1.,0.,1.,0.])
zero = tf.constant(0, dtype=tf.float32)
nonzero_flags = tf.squeeze(tf.not_equal(norms, zero))
nonzero_indices = tf.squeeze(tf.where(nonzero_flags))
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(nonzero_flags))
print(sess.run(nonzero_indices))

[ True False  True False]
[0 2]


In [114]:
def project_tensors_onto_tangent_space(hyperboloid_points, ambient_gradients):
    """
    project gradients in the ambiant space onto the tangent space
    :param hyperboloid_point: A point on the hyperboloid
    :param ambient_gradient: The gradient to project
    :return: gradients in the tangent spaces of the hyperboloid points
    """
    return ambient_gradients + tf.multiply(minkowski_tensor_dot(hyperboloid_points, ambient_gradients),
                                           hyperboloid_points)

In [115]:
def transform_grads(grad):
    """
    multiply by the inverse of the Minkowski metric tensor g = diag[-1, 1,1 ... 1] to make the first element of each
    grad vector negative
    :param grad: grad matrix of shape (n_vars, embedding_dim)
    :return:
    """
    x = np.eye(grad.shape[1])
    x[0, 0] = -1.
    T = tf.constant(x, dtype=grad.dtype)
    return tf.matmul(grad, T)

In [116]:
def rsgd(grads, vecs, lr=0.1):
    """
    Perform the Riemannian gradient descent operation by
    1/ Transforming gradients using the Minkowski metric tensor
    2/ Projecting onto the tangent space
    3/ Applying the exponential map
    :param grads:
    :param var:
    :param lr:
    :return:
    """
    minkowski_grads = transform_grads(grads)
    tangent_grads = project_tensors_onto_tangent_space(vecs, minkowski_grads)
    return tensor_exp_map(vecs, lr * tangent_grads)

In [None]:
def minkowski_vector_dot(u, v):
    """
        Minkowski dot product is the same as the Euclidean dot product, but the first element squared is subtracted
        :param u: a vector
        :param v: a vector
        :return: a scalar dot product
        """
    assert u.shape == v.shape, 'minkowski dot product not define for different shape vectors'
    # assert that the vectors have only 1d.
    # todo this currently fails because exp_map returns tensors with shape = None
    # assert u.get_shape().ndims == 1, 'applied minkowski_vector_dot to a tensor. Try using minkowski_tensor_dot'

    return tf.tensordot(u, v, 1) - 2 * tf.multiply(u[0], v[0])

In [None]:
def project_onto_tangent_space(hyperboloid_point, ambient_gradient):
    """
    project gradients in the ambiant space onto the tangent space
    :param hyperboloid_point: A point on the hyperboloid
    :param ambient_gradient: The gradient to project
    :return:
    """
    return ambient_gradient + minkowski_vector_dot(hyperboloid_point, ambient_gradient) * hyperboloid_point

In [None]:
point = tf.Variable([3.76219535,3.62686038])
g1 = tf.constant([-3., 2.])
# minkowski_grads = transform_grads(g1)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(project_onto_tangent_space(point, g1)))

In [None]:
points = tf.Variable([[1., 0.], [1., 0.], [1., 0.], [1., 0.]])  # this the minima of the hyperboloid
grads = tf.Variable([[1., 1.], [2., -1.], [3., 2.], [4., 0.]])
retval1 = np.array([[-1.], [-1.], [-1.], [-1.]])
sess = tf.Session()
lr = 0.1
init = tf.global_variables_initializer()
sess.run(init)
vals = []
for i in range(3):
    vals.append(sess.run(points))
    print(vals[i])
    print(sess.run(minkowski_tensor_dot(vals[i], vals[i])))
#     print(sess.run(points))
    points = rsgd(grads, points)
#     vals.append(points)
    # check that the points are on the hyperboloid
#     norms = sess.run(minkowski_tensor_dot(points, points))
#     print(norms)
#     try:
#         assert np.array_equal(np.around(norms, 3), retval1)
#     except AssertionError:
#         print(sess.run(points))
print(sess.run(vals))

In [None]:
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
def circ_sample():
    from matplotlib.pyplot import scatter
    # radius of the circle
    circle_r = 1
    # center of the circle (x, y)
    circle_x = 0
    circle_y = 0

    # random angle
    alpha = 2 * math.pi * np.random.rand(1000)
    # random radius
    r = circle_r * np.sqrt(np.random.rand(1000))
    # calculating coordinates
    x = r * np.cos(alpha) + circle_x
    y = r * np.sin(alpha) + circle_y
#     scatter(x,y)
    retval = np.concatenate((x,y), axis=0)
    print(retval)
    return retval

In [None]:
def to_hyperboloid_points(poincare_pts):
    """
    Post: result.shape[1] == poincare_pts.shape[1] + 1
    """
    norm_sqd = (poincare_pts ** 2).sum(axis=1)
    N = poincare_pts.shape[1]
    result = np.zeros((poincare_pts.shape[0], N + 1), dtype=np.float64)
    result[:, 1:] = (2. / (1 - norm_sqd))[:, np.newaxis] * poincare_pts
    result[:, 0] = (1 + norm_sqd) / (1 - norm_sqd)
    return result

In [None]:
def to_hyperboloid_points(vocab_size, embedding_size, init_width):
    """
    Post: result.shape[1] == poincare_pts.shape[1] + 1
    """
    poincare_pts = np.random.uniform(-init_width, init_width, (vocab_size, embedding_size))
    norm_sqd = (poincare_pts ** 2).sum(axis=1)
    # the hyperboloid has one extra ambient dimension
    result = np.zeros((poincare_pts.shape[0], embedding_size + 1), dtype=np.float64)
    result[:, 1:] = (2. / (1 - norm_sqd))[:, np.newaxis] * poincare_pts
    result[:, 0] = (1 + norm_sqd) / (1 - norm_sqd)
    return result

In [None]:
def forward(examples):
    emb = tf.Variable(to_hyperboloid_points(4, 2, 1),
                                       name="emb", dtype=tf.float32)
    
    return tf.nn.embedding_lookup(emb, examples)

sess = tf.Session()
lr = 0.1
sess.run(init)
examples = forward([1,2])
init = tf.global_variables_initializer()
print(examples.shape)


In [None]:
def tf_distance(x, y):
    """
    The distance between two vectors
    :param x: shape (1, ndims)
    :param y: shape (1,ndims)
    :return: a scalar hyperbolic distance
    """
    norm_square = tf.square(tf.norm(x - y, axis=0))
    print norm_square
    denom1 = 1 - tf.square(tf.norm(x, axis=0))
    print denom1
    denom2 = 1 - tf.square(tf.norm(y, axis=0))
    print denom2
    arg = 1 + 2 * norm_square / (denom1 * denom2)
    print arg
    return tf.acosh(arg)

In [167]:
def get_logits(example, label, sample, true_b, sample_b):
    true_logits = tf_distance(example, label) + true_b
    sampled_logits = tf_distance(example, sample) + sample_b
    return true_logits, sampled_logits

In [169]:
embedding_size = 2
vocab_size = 4
emb = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -0.1, 0.1))
sm_w_t = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -0.1, 0.1))
# sm_w_t = tf.Variable(tf.zeros([vocab_size, embedding_size]))
sm_b = tf.Variable(tf.zeros([vocab_size]))

examples = tf.Variable([1,2])
labels = tf.Variable([2,2])
sampled_ids = tf.Variable([1,3])

example_emb = tf.nn.embedding_lookup(emb, examples)
# Weights for labels: [batch_size, emb_dim]
true_w = tf.nn.embedding_lookup(sm_w_t, labels)
# Biases for labels: [batch_size, 1]
true_b = tf.nn.embedding_lookup(sm_b, labels)
sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids)
print('emb shape: ', example_emb.shape)
print('sample w shape: ', sampled_w.shape)
# Biases for sampled ids: [num_sampled, 1]
sampled_b = tf.nn.embedding_lookup(sm_b, sampled_ids)
true_logits, sampled_logits = get_logits(example_emb, true_w, sampled_w, true_b, sampled_b)
loss = nce_loss(true_logits, sampled_logits)
opt = tf.train.GradientDescentOptimizer(0.1)
emb_grad = opt.compute_gradients(loss, [emb])
sm_w_t_grad = opt.compute_gradients(loss, [sm_w_t])
grads = emb_grad + sm_w_t_grad
apply_grad = opt.apply_gradients(grads)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print 'original vectors: ', sess.run([example_emb, true_w])
    print 'emb grads are: ', sess.run(emb_grad)
    print 'sm_w_t grads are: ', sess.run(sm_w_t_grad)
    sess.run(apply_grad)
    print 'updated vectors: ', sess.run([example_emb, true_w])

('emb shape: ', TensorShape([Dimension(2), Dimension(2)]))
('sample w shape: ', TensorShape([Dimension(2), Dimension(2)]))
Tensor("Square_6:0", shape=(2,), dtype=float32)
Tensor("sub_8:0", shape=(2,), dtype=float32)
Tensor("sub_9:0", shape=(2,), dtype=float32)
Tensor("add_52:0", shape=(2,), dtype=float32)
Tensor("Square_9:0", shape=(2,), dtype=float32)
Tensor("sub_11:0", shape=(2,), dtype=float32)
Tensor("sub_12:0", shape=(2,), dtype=float32)
Tensor("add_54:0", shape=(2,), dtype=float32)
original vectors:  [array([[-0.08015664, -0.03832245],
       [ 0.00510602,  0.07676881]], dtype=float32), array([[-0.03569365,  0.09615657],
       [-0.03569365,  0.09615657]], dtype=float32)]
emb grads are:  [(IndexedSlicesValue(values=array([[-0.1547108 ,  0.18855031],
       [-0.52542841,  0.59112638]], dtype=float32), indices=array([1, 2], dtype=int32), dense_shape=array([4, 2], dtype=int32)), array([[ 0.03230792, -0.03401699],
       [-0.08015664, -0.03832245],
       [ 0.00510602,  0.07676881],


In [None]:
labels = tf.Variable([1,2])
labels_matrix = tf.reshape(
            tf.cast(labels,
                    dtype=tf.int64),
            [2, 1])

# Negative sampling.
sampled_ids, _, _ = (tf.nn.fixed_unigram_candidate_sampler(
    true_classes=labels_matrix,
    num_true=1,
    num_sampled=5,
    unique=True,  # set to True if all the samples need to be unique
    range_max=5,
    distortion=0.75,
    unigrams=[1,1,1,1,1]))
                     
print(sampled_ids.shape)
print(sampled_ids)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(sess.run(sampled_ids))
                  

In [None]:
x = {'indices':[1,2],'values':[4,5]}

In [None]:
from collections import namedtuple
grads = namedtuple('grads', 'values indices')

In [None]:
grads.values = [1,2]
grads.indices = [3,4]

In [174]:
x = np.array([[1,2]])
np.squeeze(x)

array([1, 2])

In [179]:
np.arccosh(1)

0.0

In [214]:
input_value = tf.Variable([[1., 1., 1.], [2., -1., 2.], [3., 2., 3.], [4., 0., 4.]])
kept_values = input_value[:,1:]
norm_square = tf.square(kept_values)
new_vals = tf.expand_dims(tf.sqrt(tf.reduce_sum(norm_square, axis=1)+1),axis=1)
tensor = tf.concat([new_vals, kept_values], axis=1)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(norm_square))
print(sess.run(new_vals))
print(sess.run(tensor))
print(sess.run(minkowski_tensor_dot(tensor,tensor)))

[[  1.   1.]
 [  1.   4.]
 [  4.   9.]
 [  0.  16.]]
[[ 1.73205078]
 [ 2.44948959]
 [ 3.74165726]
 [ 4.12310553]]
[[ 1.73205078  1.          1.        ]
 [ 2.44948959 -1.          2.        ]
 [ 3.74165726  2.          3.        ]
 [ 4.12310553  0.          4.        ]]
[[-1.        ]
 [-0.99999905]
 [-0.99999905]
 [-1.        ]]


In [232]:
x = np.array([1,2])
print(x.shape)

(2,)
