In [153]:
import numpy as np
import tensorflow as tf
import time

In [154]:
# N observations with D parameters
N = 2**12
D = 50
# projection dimension
K = 500
# variance parameter
sigma = 3
# Y = Xb + N(0, sigma)
#beta_true = np.random.uniform(-10, 10, D).reshape(D, 1)
beta_true = np.array([[1], [2], [3], [4], [5]])
dataX = np.random.normal(0, 1, N * D).reshape(N, D)
dataY = np.random.normal(0, sigma, N).reshape(N, 1) + np.dot(dataX, beta_true)

In [155]:
beta_LS = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(dataX), dataX)), np.transpose(dataX)), dataY)

In [156]:
# placeholding tensors and variable
X = tf.placeholder('float', [None, D]) 
Y = tf.placeholder('float', [None, 1.0])
beta = tf.Variable(tf.random_normal([D, 1], stddev=1.0))

In [157]:
# linear regression with mean squared error
Y_hat = tf.matmul(X, beta)
MSE = tf.reduce_sum(tf.square(Y - Y_hat))

In [158]:
# gradient
grad = tf.gradients(MSE, beta)

In [159]:
# hessian (or use tf.hessians in Tensorflow 1.0)
def compute_hessian():
    for i in range(D):
        # element in the gradient vector
        dfdx_i = tf.slice(grad[0], begin=[i, 0], size=[1, 1])
        # differentiate again
        ddfdx2_i = tf.gradients(dfdx_i, beta)[0]
        # combine second derivative vectors
        if i == 0:
            hess = ddfdx2_i
        else:
            hess = tf.concat([hess, ddfdx2_i], 1)
    return(hess)

hessian = compute_hessian()

In [160]:
# fisher information
fisher = tf.matrix_inverse(hessian)

In [161]:
# update beta by delta
delta = tf.placeholder('float', [D, 1])
drop = beta.assign_add(delta)

In [162]:
beta_sketch = np.zeros((D, 1))

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(0, 20):
        # gaussian random projection
        S = np.random.normal(0, 1, K * N).reshape(K, N)
        SX = np.dot(S, dataX)
        SY = np.dot(S, dataY)
        # compute gradient
        g = sess.run(grad, feed_dict={X: SX, Y: SY})[0]
        # compute hessian
        I = sess.run(fisher, feed_dict={X: SX, Y: SY})
        # drop
        sess.run(drop, feed_dict={delta : -np.dot(I, g)})
    beta_sketch = beta.eval()

In [163]:
#ratio = np.mean(np.square(beta_true - beta_sketch)) / np.mean(np.square(beta_true - beta_LS))
ratio = np.linalg.norm(beta_sketch - beta_LS) / np.linalg.norm(beta_LS)

print(ratio)
print(np.linalg.norm(beta_LS - beta_true))
print(np.linalg.norm(beta_sketch - beta_true))
print(np.linalg.norm(beta_LS - beta_sketch))

0.0149597590399
0.155153903846
0.214066917926
0.11076192784


In [164]:
print(beta_LS)
print(beta_sketch)

[[ 1.06418434]
 [ 2.10862843]
 [ 2.92607846]
 [ 3.94884799]
 [ 5.00848795]]
[[ 1.1277833 ]
 [ 2.15895081]
 [ 2.95877457]
 [ 3.98613214]
 [ 4.95163822]]
