In [None]:
import numpy as np
import tensorflow as tf

In [None]:
# N observations with D parameters
N = 1024
D = 50

In [None]:
beta_true = np.random.uniform(-2, 2, D).reshape(D, 1)

In [None]:
def generateData(n, beta, link='linear', sigma=3.0):
    """Generate data for GLM.
    
    # Arguments
        N: number of observations
        beta: regression coefficient vector
        link: linear or logistic
        sigma: standard deviation of Gaussian noise (only for linear regression)
        
    # Returns
        A list of design matrix and response vector
    """
    d = beta.shape[0]
    X = np.random.normal(0, 1, n * d).reshape(n, d)
    eta = np.dot(X, beta)
    if link == 'linear':
        mu = eta
        Y = eta + np.random.normal(0, sigma, n).reshape(n, 1)
    elif link == 'logistic':
        mu = 1.0 / np.exp(-eta)
        Y = (mu > 0.5).astype(float)
    return(X, Y)

In [None]:
dataX, dataY = generateData(N, beta_true, link='linear')

In [None]:
np.savetxt('dataX.csv', dataX)
np.savetxt('dataY.csv', dataY)

In [None]:
# best linear unbiased estimate
def lm(X, Y):
    estimate = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(X), X)), np.transpose(X)), Y)
    return(estimate)

In [None]:
beta_hat = lm(dataX, dataY)

In [None]:
# placeholding tensors and variable
X = tf.placeholder('float', [None, D]) 
Y = tf.placeholder('float', [None, 1])
beta = tf.Variable(tf.random_normal([D, 1], stddev=1.0))

In [None]:
# linear regression with mean squared error
Y_hat = tf.matmul(X, beta)
loss = tf.reduce_sum(tf.square(Y - Y_hat))

In [None]:
# logistic regression with log loss
eta = tf.matmul(X, beta)
p = 1.0 / tf.exp(-eta)
loss = -1.0 * (tf.reduce_sum(Y * tf.log(p) + (1.0 - Y) * tf.log(1 - p)))

In [None]:
# gradient of loss function
grad = tf.gradients(loss, beta)

In [None]:
# hessian (or use tf.hessians in Tensorflow 1.0)
def compute_hessian():
    for i in range(D):
        # element in the gradient vector
        dfdx_i = tf.slice(grad[0], begin=[i, 0], size=[1, 1])
        # differentiate again
        ddfdx2_i = tf.gradients(dfdx_i, beta)[0]
        # combine second derivative vectors
        if i == 0:
            hess = ddfdx2_i
        else:
            hess = tf.concat(1, [hess, ddfdx2_i])
    return(hess)

hessian = compute_hessian()

In [None]:
# fisher information
fisher = tf.matrix_inverse(hessian)

In [None]:
# update beta by delta
delta = tf.placeholder('float', [D, 1])
descent = beta.assign_add(delta)

In [None]:
def hadamard(k):
    """Create standard Hadamard matrix.
    
    # Arguments
        k: power of 2
    
    # Returns
        A Hadamard matrix of size 2 ^ k
    """
    H2 = np.ones((2, 2))
    H2[1, 1] = -1.0
    H2 = H2 / np.sqrt(2)
    H = 1.0
    for i in range(0, k):
        H = np.kron(H2, H)
    return(H)

In [None]:
def sketch(X, Y, R, method='gaussian', sampling=False, bootstrap=False):
    """Randomly data projection.
    
    # Arguments
        X: design matrix
        Y: response vector
        R: projection dimension
        
    # Usage
        Subsampling: method='none', sampling=True, bootstrap=True
        i.i.d Gaussian: method='gaussian', sampling=False, bootstrap=False
        Hadamard: method='hadamard', sampling=True, bootstrap=False
    
    # Returns
        A list of design matrix and response vector in projection space
    """
    N = X.shape[0]
    D = X.shape[1]
    SX = X
    SY = Y
    if method == 'gaussian':
        S = np.random.normal(0, 1, R * N).reshape(R, N)
        SX = np.dot(S, X)
        SY = np.dot(S, Y)
    elif method == 'hadamard':
        H = hadamard(int(np.log2(N)))
        temp = np.ones(N)
        temp[np.random.randint(low=0, high=N, size=int(N / 2))] = -1.0
        D = np.diag(temp)
        S = np.dot(H, D)
        SX = np.dot(S, X)
        SY = np.dot(S, Y)
    if sampling:
        select = np.random.choice(np.array(range(0, N)), size=R, replace=bootstrap)
        SX = SX[select, :]
        SY = SY[select, :]
    return(SX, SY)

In [None]:
# sample path
path = np.zeros((50, 50))

with tf.Session() as sess:
    tf.initialize_all_variables().run()
    for i in range(0, 50):
        # sketching
        sketchX, sketchY = sketch(dataX, dataY, 256, method='gaussian', sampling=False, bootstrap=False)
        # compute gradient
        g = sess.run(grad, feed_dict={X: sketchX, Y: sketchY})[0]
        # compute hessian
        I = sess.run(fisher, feed_dict={X: sketchX, Y: sketchY})
        # update beta
        sess.run(descent, feed_dict={delta : -np.dot(I, g)})
        path[i, :] = np.transpose(beta.eval())

In [None]:
np.savetxt('gaussian.csv', path)

In [None]:
# arrays to store results
B_hat = np.zeros((100, D))
B_sketch = np.zeros((100, D))

# simulate 100 times
for i in range(0, 100):
    dataX, dataY = generateData(N, beta_true, link='linear')
    B_hat[i, :] = np.transpose(lm(dataX, dataY))
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for j in range(0, 10):
            sketchX, sketchY = sketch(dataX, dataY, 512, method='hadamard', sampling=True, bootstrap=False)
            g = sess.run(grad, feed_dict={X: sketchX, Y: sketchY})[0]
            I = sess.run(fisher, feed_dict={X: sketchX, Y: sketchY})
            sess.run(descent, feed_dict={delta : -np.dot(I, g)})
        B_sketch[i, :] = np.transpose(beta.eval())

In [None]:
ratio = np.mean(np.square(B_sketch - np.transpose(beta_true))) / np.mean(np.square(B_hat - np.transpose(beta_true)))
print(ratio)