In [1]:
import autograd.numpy as np
from autograd import grad
import tensorflow as tf
import time

In [2]:
"""
Class for Stiefel Sampling in pure python with autograd
"""
class StiefelSampling(object):
    
    def get_stiefel_dim(self, n, p):
        return int(n*p - p*(p+1)/2)
    
    def create_rotation_matrix(self, angle, n, i_in, j_in):
        R = []
        for i in range(n):
            z = []
            for j in range(n):
                if i == i_in:
                    if j == i_in:
                        z.append(np.cos(angle))
                    elif j == j_in:
                        z.append(-np.sin(angle))
                    else:
                        if i == j:
                            z.append(1.)
                        else:
                            z.append(0)
                elif i == j_in:
                    if j == i_in:
                        z.append(np.sin(angle))
                    elif j == j_in:
                        z.append(np.cos(angle))
                    else:
                        if i == j:
                            z.append(1.)
                        else:
                            z.append(0)
                else:
                    if i == j:
                        z.append(1.)
                    else:
                        z.append(0)
            R.append(z)
    
        return np.array(R)
    
    def create_derivative_of_rotation_matrix(self, angle, n, i_in, j_in):
        R = []
        for i in range(n):
            z = []
            for j in range(n):
                if i == i_in:
                    if j == i_in:
                        z.append(-np.sin(angle))
                    elif j == j_in:
                        z.append(-np.cos(angle))
                    else:
                        if i == j:
                            z.append(1.)
                        else:
                            z.append(0)
                elif i == j_in:
                    if j == i_in:
                        z.append(np.cos(angle))
                    elif j == j_in:
                        z.append(-np.sin(angle))
                    else:
                        if i == j:
                            z.append(1.)
                        else:
                            z.append(0)
                else:
                    if i == j:
                        z.append(1.)
                    else:
                        z.append(0)
            R.append(z)
        
        return np.array(R)
    
    def create_partial_givens(self, angles, n, p):
        # Accumulate forward rotations
        G = np.eye(n)
        A = [G]
        idx = 0
        for i in range(p):
            for j in range(i+1, n):
                R = self.create_rotation_matrix(angles[idx], n, i, j)
                G = np.matmul(G, R)
                A.append(G)
                
                idx += 1
        
        # Backward Accumulate Rotations
        G = np.eye(n)[:,:p]
        B = [G]
        idx = self.get_stiefel_dim(n, p) - 1
        for i in range(p-1, -1, -1):
            for j in range(n-1, i, -1):
                R = self.create_rotation_matrix(angles[idx], n, i, j)
                G = np.matmul(R, G)
                B.insert(0, G)
                
                idx -= 1
        
        return [A, B]
    
    def create_givens_matrix_from_rots(self, partial_rotations, n, p):
        G = partial_rotations[0][self.get_stiefel_dim(n,p)]
        return G
    
    def get_givens_jacobians(self,partial_rotations, angles, n, p):
    
        derivatives_list = []

        idx = 0
        for i in range(p):
            for j in range(i+1, n):
                d = self.create_derivative_of_rotation_matrix(angles[idx], n, i, j)
                a = partial_rotations[0][idx]
                b = partial_rotations[1][idx + 1]
                db = np.matmul(d, b)
                adb = np.matmul(a, db)
                
                derivatives_list.append(adb)

                idx = idx + 1

        givens_jacobian_tensor = np.stack(derivatives_list)
        
        stiefel_dim = self.get_stiefel_dim(n,p)
        givens_jacobians = []
        for i in range(p):
            J = givens_jacobian_tensor[:stiefel_dim,:n,i:i+1].reshape((stiefel_dim,n)).T
            givens_jacobians.append(J)
            
        return givens_jacobians
    
    def get_stiefel_area_form_partial(self, G, jacobians, n, p):
        
        f_list = []

        for i in range(p):
            one_forms = np.matmul(G.T[i+1:n,:], jacobians[i]).T
            for j in range(n-i-1):
                f_list.append(one_forms[:,j:j+1])
        
        f_mat = np.hstack(f_list)
        det = np.linalg.det(f_mat)
        return det
    
    def get_area(self, n, p):
        
        def a(angles):
            sRots = self.create_partial_givens(angles,n,p)
            sGivens = self.create_givens_matrix_from_rots(sRots, n, p)
            sGivensJacobian = self.get_givens_jacobians(sRots, angles, n, p)
            sArea = self.get_stiefel_area_form_partial(sGivens, sGivensJacobian, n, p)
            return sArea
        
        return a, grad(a)
        

In [3]:
"""
Class for Stiefel sampling in tensorflow
"""
class TfStiefelSampling(object):
    def get_stiefel_dim(self, n, p):
        return int(n*p - p*(p+1)/2)
    
    def create_rotation_matrix(self, angle, n, i_in, j_in):
        R = []
        for i in range(n):
            z = []
            for j in range(n):
                if i == j:
                    z.append(1)
                else:
                    z.append(0)
            R.append(z)

        R[i_in][i_in] = tf.cos(angle)
        R[i_in][j_in] = -tf.sin(angle)
        R[j_in][i_in] = tf.sin(angle)
        R[j_in][j_in] = tf.cos(angle)

        return tf.stack(R)
    
    def create_derivative_of_rotation_matrix(self, angle, n, i_in, j_in):
        trig_indices = [[i_in,i_in],[i_in,j_in],[j_in,i_in],[j_in,j_in]]
        trig_elements = [-tf.sin(angle), -tf.cos(angle), tf.cos(angle), -tf.sin(angle)]
        return tf.scatter_nd(trig_indices, trig_elements, [n,n])
    
    def create_partial_givens(self, angles, n, p):
        G = tf.constant(np.eye(n), dtype = tf.float64)
        A = [G]
        idx = 0
        for i in range(p):
            for j in range(i+1, n):
                R = self.create_rotation_matrix(angles[idx], n, i, j)
                G = tf.matmul(G, R, b_is_sparse=True)
                A.append(G)

                idx = idx + 1

        G = tf.constant(np.eye(n), dtype = tf.float64)[:,:p]
        B = [G]
        stiefel_dim = int(n*p - p*(p+1)/2)
        idx = stiefel_dim - 1
        for i in range(p-1, -1, -1):
            for j in range(n-1, i, -1):
                R = self.create_rotation_matrix(angles[idx], n, i, j)
                G = tf.matmul(R, G, a_is_sparse=True)
                B.insert(0, G)

                idx = idx - 1

        return [A, B]
    
    def create_givens_matrix(self, angles, n, p):
        idx = 0
        G = tf.constant(np.eye(n), dtype = tf.float64)
        for i in range(p):
            for j in range(i+1,n):
                R = self.create_rotation_matrix(angles[idx], n, i, j)
                G = tf.matmul(G, R, b_is_sparse=True)

                idx = idx +1

        return G
    
    def create_givens_matrix_from_rots(self, partial_rotations, n, p):
        d = int(n*p - p*(p+1)/2)
        G = tf.identity(partial_rotations[0][d])
        return G
    
    def get_givens_jacobians(self, partial_rotations, angles, n, p):
    
        derivatives_list = []

        idx = 0
        for i in range(p):
            for j in range(i+1, n):
                d = self.create_derivative_of_rotation_matrix(angles[idx], n, i, j)
                a = partial_rotations[0][idx]
                b = partial_rotations[1][idx + 1]
                db = tf.matmul(d, b, a_is_sparse=True)
                adb = tf.matmul(a, db)
                dG_dTheta_ij = tf.identity(adb)
                derivatives_list.append(dG_dTheta_ij)

                idx = idx + 1

        givens_jacobian_tensor = tf.stack(derivatives_list)

        stiefel_dim = int(n*p - p*(p+1)/2)
        givens_jacobians = []
        for i in range(p):
            J = tf.transpose(tf.reshape(tf.slice(givens_jacobian_tensor, [0,0,i], [stiefel_dim, n, 1]),[stiefel_dim,n]))
            givens_jacobians.append(J)

        return givens_jacobians
    
    def get_stiefel_area_form_partial(self, G, jacobians, n, p):
    
        f_list = []
        stiefel_dim = int(n*p - p*(p+1)/2)

        for i in range(p):
            G_transpose_rows = tf.slice(tf.transpose(G), [i+1, 0],[n-i-1,n])
            one_forms = tf.transpose(tf.matmul(G_transpose_rows, jacobians[i]))

            for j in range(n-i-1):
                one_form = tf.slice(one_forms, [0, j], [stiefel_dim, 1])
                f_list.append(one_form)

        f_mat = tf.concat(f_list, axis = 1)
        det = tf.matrix_determinant(f_mat)
        return det
    
    def get_grad_area(self, theta, n, p):
        
        tfpR = self.create_partial_givens(theta, n, p)
        tfG = self.create_givens_matrix_from_rots(tfpR, n, p)
        tfGJ = self.get_givens_jacobians(tfpR, theta, n, p)
        tfArea = self.get_stiefel_area_form_partial(tfG, tfGJ, n, p)
        tfGradArea = tf.gradients(tfArea, theta)
        
        return tfGradArea

In [4]:
# Running computations in tensorflow version
n = 4
p = 3
d = int(n*p - p*(p+1)/2)
theta = [tf.placeholder(tf.float64) for i in range(d)]
T = TfStiefelSampling()
tfGradArea = T.get_grad_area(theta, n, p)
with tf.Session() as sess:
    g = sess.run(tfGradArea, {i : np.pi/4 for i in theta})
    print(g) 

[2.7755575615628914e-17, -0.25, -0.50000000000000022, -4.0476550301515863e-17, -0.25000000000000006, 5.5511151231257827e-17]


In [5]:
# Running computations in python version
s_n = 4
s_p = 3
s_d = int(s_n*s_p - s_p*(s_p+1)/2)
s = StiefelSampling()
angles = [np.pi/4 for _ in range(s_d)]
area, grad_area = s.get_area(s_n, s_n)
print(grad_area(angles))

[2.7755575615628914e-17, -0.24999999999999983, -0.49999999999999989, 1.12810580094366e-17, -0.24999999999999972, 1.038475052298593e-16]
