In [2]:
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import math_ops
import networkx as nx
import numpy as np
import pandas as pd

In [3]:
@ops.RegisterGradient("gradient_no_unitary_adjustment")
def _test1(op, grad_e, grad_v):
    """Gradient for SelfAdjointEigV2 derived with Joan with no adjustment for subspace"""
    e = op.outputs[0]
    v = op.outputs[1]
    #dim = v.get_shape()
    with ops.control_de pendencies([grad_e.op, grad_v.op]):
        if grad_v is not None:  
            E = array_ops.diag(e)
            #v_proj = arrary.ops.slice(v, [0,0], [])
            grad_grassman = grad_v# - math_ops.batch_matmul(math_ops.batch_matmul(v, array_ops.transpose(grad_v)), v)
            grad_a = math_ops.batch_matmul(grad_grassman, math_ops.batch_matmul(E, array_ops.transpose(grad_v)))+math_ops.batch_matmul(grad_v, math_ops.batch_matmul(E, array_ops.transpose(grad_grassman)))
    return grad_a

SyntaxError: invalid syntax (<ipython-input-3-aeaa3c06242a>, line 7)

In [4]:
@ops.RegisterGradient("grassman_with_2d")
def _test1(op, grad_e, grad_v):
    """Gradient for SelfAdjointEigV2 derived with Joan with no adjustment for subspace"""
    e = op.outputs[0]
    v = op.outputs[1]
    #dim = v.get_shape()
    with ops.control_dependencies([grad_e.op, grad_v.op]):
        if grad_v is not None:  
            E = array_ops.diag(e)
            v_proj = array_ops.slice(v, [0,0], [20,2])
            grad_grassman = grad_v - math_ops.batch_matmul(math_ops.batch_matmul(v_proj, array_ops.transpose(v_proj)), grad_v)
            grad_a = math_ops.batch_matmul(grad_grassman, math_ops.batch_matmul(E, array_ops.transpose(grad_v)))+math_ops.batch_matmul(grad_v, math_ops.batch_matmul(E, array_ops.transpose(grad_grassman)))
    return grad_a

In [5]:
def balanced_stochastic_blockmodel(communities=2, groupsize=3, p_in=0.5, p_out=0.1, seed=None):
    #gives dense adjacency matrix representaiton of randomly generated SBM with balanced community size

    G = nx.planted_partition_graph(l=communities, k=groupsize, p_in=p_in, p_out =p_out, seed=seed)
    A = nx.adjacency_matrix(G).todense()
    
    return A
def target_subspace(adj, groupsize, communities, diag, dim_proj):
    normalizer = tf.cast(2.0*groupsize*communities, dtype=tf.float64)
    total_degree = tf.cast(tf.reduce_sum(adj), dtype=tf.float64)
    r = tf.sqrt(total_degree/normalizer)
    BH_op = (tf.square(r)-1)*tf.diag(tf.ones(shape=[communities*groupsize], dtype=tf.float64))-r*adj+diag 
    val, vec = tf.self_adjoint_eig(BH_op) #this is already normalized so no need to normalize
    subspace = tf.slice(vec, [0,0], [communities*groupsize, dim_proj])
    return r, subspace

def proj_magnitude(space, vector):
    projection_op = tf.matmul(space, tf.transpose(space))
    projection = tf.matmul(projection_op, vector)
    return tf.sqrt(tf.reduce_sum(tf.square(projection))) #tf.reduce_sum(tf.abs(projection))#


def rnd_vec_normed(communities, groupsize, seed=None):
    rnd_vec1 = tf.Variable(tf.random_normal(shape=[communities*groupsize,1], mean=0.0,stddev=1.0,
                                                    dtype=tf.float64,
                                                    seed=seed))
    return normalize_vec(rnd_vec1)


In [28]:
#Let's create a dataset
communities = 2
group_size = 10

data_easy = [np.asarray(balanced_stochastic_blockmodel(communities, group_size, p, 0.1*p)).astype(np.double) for p in np.linspace(0.2, 0.4,70)]
data_hard = [np.asarray(balanced_stochastic_blockmodel(communities, group_size, p, 0.5*p)).astype(np.double) for q in np.linspace(0.2, 0.4,7)]

data = data_easy+data_hard
np.random.shuffle(data)


In [29]:
#we need to do online training for now as I cannot write eigendecomposition in batch

data[0]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,  0.,  0.,  0.,
         1.,  1.,  0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  1.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  1.,
         0.,  0.,  1.,  0.,  1.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  1.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [ 1.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  1.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  1.,  0.,
         0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 1.,  1.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,
         0.,  0.,  0.,  1.,  0.,  1.,  0.],
       [ 0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  

In [62]:
def learn_average_deg_variable(communities = 2, group_size = 10, seed_v=None, name='grassman_with_2d', projection_dim=2, 
                     print_ratio=10, l_rate=0.1, mean=0.3, sd=1.0):
    """testing to see if the loss will decrease backproping through very simple function"""
    
    #now p and q will be generated from a range of 
    
    X = tf.placeholder(dtype=tf.float64, shape=[communities*group_size, communities*group_size])
    
    B = tf.cast(X, dtype = tf.float64)
    
    Diag = tf.diag(tf.reduce_sum(B,0))
    Diag = tf.cast(Diag, tf.float64)
    
    #by symmetry I should make this a bit more constrained.  so

    v =  tf.Variable(tf.random_normal(shape=[communities*group_size,1], mean=mean,
                                 stddev=sd, dtype=tf.float64,
                                 seed=seed_v, name=None))
    
     
    
    degree = tf.cast(communities*group_size, dtype=tf.float64)
    r_param = tf.div(tf.cast(1.0, dtype=tf.float64), degree)*tf.matmul(tf.transpose(v), tf.matmul(Diag, v))

    
    BH = (tf.square(r_param)-1)*tf.diag(tf.ones(shape=[communities*group_size], dtype=tf.float64))-tf.mul(r_param, B)+Diag 
    

    with tf.Session() as sess:
        g = tf.get_default_graph()
        
        with g.gradient_override_map({'SelfAdjointEigV2': name}):
            eigenval, eigenvec = tf.self_adjoint_eig(BH)
            #we try to do svm in this subspace 
            #or we can project it down to 1 dimensions, do the clustering there via some threshold and check if it makes sense 
            #by computing the loss, if it is too big, we change the angle we project down to...
            
            
            eigenvec_proj = tf.slice(eigenvec, [0,0], [communities*group_size, projection_dim])
            
            
            
            true_assignment_a = tf.concat(0, [-1*tf.ones([group_size], dtype=tf.float64),
                                      tf.ones([group_size], dtype=tf.float64)])
            true_assignment_b = -1*true_assignment_a
            true_assignment_a = tf.expand_dims(true_assignment_a, 1)
            true_assignment_b = tf.expand_dims(true_assignment_b, 1)

            
            projected_a = tf.matmul(tf.matmul(eigenvec_proj, tf.transpose(eigenvec_proj)), true_assignment_a)#tf.transpose(true_assignment_a))
            projected_b = tf.matmul(tf.matmul(eigenvec_proj, tf.transpose(eigenvec_proj)), true_assignment_b)#tf.transpose(true_assignment_b))
            
            
            
            loss = tf.minimum(tf.reduce_sum(tf.square(tf.sub(projected_a, true_assignment_a))),
                              tf.reduce_sum(tf.square(tf.sub(projected_b, true_assignment_b))))
            
            optimizer = tf.train.GradientDescentOptimizer(l_rate)
            
            train = optimizer.minimize(loss, var_list=[v])

            eigenvec_grad = tf.gradients(eigenvec, v)
            loss_grad = tf.gradients(loss, v)
            
            
            
            r_op, target = target_subspace(adj=B, groupsize=group_size, communities=communities, diag=Diag, dim_proj=projection_dim)  
            
            r_op_projection_a = tf.matmul(tf.matmul(target, tf.transpose(target)), true_assignment_a)
            r_op_projection_b = tf.matmul(tf.matmul(target, tf.transpose(target)), true_assignment_b)
            r_op_loss = tf.minimum(tf.reduce_sum(tf.square(tf.sub(r_op_projection_a, true_assignment_a))),
                              tf.reduce_sum(tf.square(tf.sub(r_op_projection_b, true_assignment_b))))
            
            init = tf.initialize_all_variables()
            
            
            sess.run(init)
            sess.run(train, feed_dict={X:data[0]})
            a,r, b,c,d= sess.run([v, r_param, r_op_loss, r_op, tf.transpose(r_op_projection_a)], feed_dict={X:data[0]})
            a_lst = []
            r_lst = []
            b_lst = []
            c_lst = []
            d_lst = []
            
            a_lst.append(a)
            r_lst.append(r)
            b_lst.append(b)
            c_lst.append(None)
            d_lst.append(d)
            
            print "initial v: {}. r_param: {}. r_op = sqrt(average degree) : {} . Loss associated with r_op: {}. r_op assignments {}.".format(a, r, c, b, d)
            for i in range(len(data)):   
                try:
                    sess.run(feed_dict={X:data[i]})
                    sess.run(train)
                except: 
                    pass
                
                if i%print_ratio==0:  
                    #print i
                    try:
                        a,r, b,c,d = sess.run([v, r_param, loss, tf.gradients(loss, v), tf.transpose(projected_a)], feed_dict={X:data[i]}) 
                        a_lst.append(a)
                        r_lst.append(r)
                        b_lst.append(b)
                        c_lst.append(c)
                        d_lst.append(d)
                    except:
                        a,r, b,c,d = None, None, None, None, None 
                        a_lst.append(a)
                        r_lst.append(r)
                        b_lst.append(b)
                        c_lst.append(c)
                        d_lst.append(d)
                    #print "current r: {}, current loss: {}, gradient of loss/r is {} and current assignments (up to sign) {}.".format(a,b,c,d)  

    d = {"v": a_lst, "r_param": r_lst, "loss": b_lst, "gradient_loss_v": c_lst, "projection": d_lst}
    d = pd.DataFrame(d)
    d.to_csv("/Users/xiangli/Desktop/clusternet/Learning_r_matrix_data/r{}rate{}p{}q{}step{}.csv".format(mean, l_rate, p, q, print_ratio))
    return  d
                

                
                

In [63]:
learn_average_deg_variable(print_ratio=1, name='grassman_with_2d',  l_rate=0.00001, mean=0.01)

KeyboardInterrupt: 

In [40]:
#Let's create a dataset
communities = 2
group_size = 10

data_easy = [np.asarray(balanced_stochastic_blockmodel(communities, group_size, p, 0.1*p)).astype(np.double) for p in np.linspace(0.1, 0.3,50)]
data_hard = [np.asarray(balanced_stochastic_blockmodel(communities, group_size, p, 0.5*p)).astype(np.double) for q in np.linspace(0.1, 0.3,5)]

#data = data_easy+data_hard
np.random.shuffle(data)


In [48]:
def learn_average_deg_variable(communities = 2, group_size = 10, seed_v=None, projection_dim=2, print_ratio=10, l_rate=0.00000001, mean=0.3, sd=0.1):
    """testing to see if the loss will decrease backproping through very simple function"""
    
    #now p and q will be generated from a range of 
    
    X = tf.placeholder(dtype=tf.float64, shape=[communities*group_size, communities*group_size])
    
    B = tf.cast(X, dtype = tf.float64)
    
    Diag = tf.diag(tf.reduce_sum(B,0))
    Diag = tf.cast(Diag, tf.float64)
    
    #by symmetry I should make this a bit more constrained.  so

    v =  tf.Variable(tf.random_normal(shape=[communities*group_size,1], mean=mean,
                                 stddev=sd, dtype=tf.float64,
                                 seed=seed_v, name=None))
    v_sqr = tf.square(v)
    
     
    
    degree = tf.cast(communities*group_size, dtype=tf.float64)
    r_param = tf.div(tf.cast(1.0, dtype=tf.float64), degree)*tf.matmul(tf.transpose(v_sqr), tf.matmul(Diag, v_sqr))

    
    BH = (tf.square(r_param)-1)*tf.diag(tf.ones(shape=[communities*group_size], dtype=tf.float64))-tf.mul(r_param, B)+Diag 
    

    with tf.Session() as sess:
        g = tf.get_default_graph()
        
        with g.gradient_override_map({'SelfAdjointEigV2': 'grassman_with_2d'}):
            eigenval, eigenvec = tf.self_adjoint_eig(BH)
            #we try to do svm in this subspace 
            #or we can project it down to 1 dimensions, do the clustering there via some threshold and check if it makes sense 
            #by computing the loss, if it is too big, we change the angle we project down to...
            
            
            eigenvec_proj = tf.slice(eigenvec, [0,0], [communities*group_size, projection_dim])
            
            
            
            true_assignment_a = tf.concat(0, [-1*tf.ones([group_size], dtype=tf.float64),
                                      tf.ones([group_size], dtype=tf.float64)])
            true_assignment_b = -1*true_assignment_a
            true_assignment_a = tf.expand_dims(true_assignment_a, 1)
            true_assignment_b = tf.expand_dims(true_assignment_b, 1)

            
            projected_a = tf.matmul(tf.matmul(eigenvec_proj, tf.transpose(eigenvec_proj)), true_assignment_a)#tf.transpose(true_assignment_a))
            projected_b = tf.matmul(tf.matmul(eigenvec_proj, tf.transpose(eigenvec_proj)), true_assignment_b)#tf.transpose(true_assignment_b))
            
            
            
            loss = tf.minimum(tf.reduce_sum(tf.square(tf.sub(projected_a, true_assignment_a))),
                              tf.reduce_sum(tf.square(tf.sub(projected_b, true_assignment_b))))
            
            optimizer = tf.train.AdamOptimizer(l_rate)
            
            train = optimizer.minimize(loss, var_list=[v])

            eigenvec_grad = tf.gradients(eigenvec, v)
            loss_grad = tf.gradients(loss, v)
            
            r_op, target = target_subspace(adj=B, groupsize=group_size, communities=communities, diag=Diag, dim_proj=projection_dim)
            r_diff = (r_op-r_param) #difference between r_op and r_param is how close we are to the average degree
            
            
            init = tf.initialize_all_variables()
            
            
            sess.run(init)
            a,r, b= sess.run([v, r_param, r_diff], feed_dict={X:data[0]})
            a_lst = []
            r_lst = []
            r_diff_list = []
            b_lst = []
            c_lst = []
            d_lst = []
            
            a_lst.append(a)
            r_lst.append(r)
            r_diff_list.append(b)
            b_lst.append(None)
            c_lst.append(None)
            d_lst.append(None)
            
            print "initial v: {}. r_param: {}, difference between r_param and sqrt average deg {}.".format(a, r, b)
            for i in range(len(data)):   
                try:
                    sess.run(train, feed_dict={X:data[i]})
                    #if i%print_ratio==0:  
                    #print i
                        #try:
                    a,r, k, b,c,d = sess.run([v, r_param, r_diff, loss, tf.gradients(loss, v), tf.transpose(projected_a)], feed_dict={X:data[i]}) 
                    a_lst.append(a)
                    r_lst.append(r)
                    r_diff_list.append(k)
                    b_lst.append(b)
                    c_lst.append(c)
                    d_lst.append(d)
                    
                            
                except: 
                    a,r, k, b,c,d = None, None, None, None, None, None 
                    a_lst.append(a)
                    r_lst.append(r)
                    r_diff_list.append(k)
                    b_lst.append(b)
                    c_lst.append(c)
                    d_lst.append(d)
                    pass
                

                    #print "current r: {}, current loss: {}, gradient of loss/r is {} and current assignments (up to sign) {}.".format(a,b,c,d)  

    d = {"v": a_lst, "r_param": r_lst, "r_diff": r_diff_list, "loss": b_lst, "gradient_loss_v": c_lst, "projection": d_lst}
    d = pd.DataFrame(d)
    easy_size = len(data)
    d.to_csv("/Users/xiangli/Desktop/clusternet/Learning_r_matrix_data/mean{}l_rate{}step{}data_size{}.csv".format(mean, l_rate, print_ratio, easy_size))
    return  d
                

In [49]:
#data = data+data+data+data
#np.random.shuffle(data)


In [50]:
learn_average_deg_variable(print_ratio=1, l_rate = 0.1)

initial v: [[ 0.33278568]
 [ 0.40563987]
 [ 0.48760857]
 [ 0.21069585]
 [ 0.09485482]
 [ 0.27115713]
 [ 0.46239166]
 [ 0.32648871]
 [ 0.47354541]
 [ 0.28008422]
 [ 0.17787136]
 [ 0.281964  ]
 [ 0.27863064]
 [ 0.50821174]
 [ 0.36968949]
 [ 0.21332368]
 [ 0.43487074]
 [ 0.28825561]
 [ 0.22069374]
 [ 0.15399564]]. r_param: [[ 0.03601875]], difference between r_param and sqrt average deg [[ 0.88593569]].


Unnamed: 0,gradient_loss_v,loss,projection,r_diff,r_param,v
0,,,,[[0.88593569454]],[[0.0360187511891]],"[[0.332785679741], [0.405639874067], [0.487608..."
1,"[[[1.03029556996], [2.33197051184], [9.5126845...",18.000000,"[[2.87375661629e-45, -2.60368022498e-23, -5.96...",[[0.908844763879]],[[0.0131096818504]],"[[0.232785689119], [0.305639878578], [0.387608..."
2,"[[[0.276727279016], [0.91040992538], [0.0], [0...",17.994715,"[[1.8336051726e-15, 1.29135422539e-15, -1.0, 2...",[[1.09021450378]],[[0.00523061122638]],"[[0.142644314213], [0.212153142486], [0.320602..."
3,"[[[-4.71235516682e-12], [-2.21382332671e-11], ...",15.985191,"[[-6.89865205893e-10, -9.17816299371e-10, -0.0...",[[1.15987636616]],[[0.00201863769827]],"[[0.072965017458], [0.139887881018], [0.268807..."
4,"[[[0.000793670266822], [0.0966187306958], [0.8...",17.997046,"[[-2.00371086751e-10, -9.99218805376e-11, -0.0...",[[1.11729791742]],[[0.000736071326968]],"[[0.0137247402961], [0.0748670613236], [0.2218..."
5,"[[[1.93471720866e-15], [-4.23978983044e-16], [...",17.270838,"[[2.58006073159e-18, 4.40591554232e-15, 3.3099...",[[1.18254423931]],[[0.000671717310256]],"[[-0.0363468361048], [0.0199096050621], [0.182..."
6,"[[[-0.0], [-0.00614487609122], [0.23322657962]...",18.456797,"[[0.0, 5.09763363833e-22, -9.80267452651e-17, ...",[[0.921738214932]],[[0.000216230797665]],"[[-0.0795020127338], [-0.0274769240511], [0.14..."
7,"[[[1.2625584945e-07], [3.84213922233e-08], [-1...",17.998956,"[[-6.00353717975e-11, -0.000391631636956, -9.4...",[[1.22396191465]],[[0.000782956744689]],"[[-0.117189691079], [-0.0688598680645], [0.115..."
8,"[[[1.10202365149e-05], [3.24473943292e-06], [-...",18.000083,"[[0.000286206587598, 1.40425981494e-08, -8.428...",[[1.49828184585]],[[0.00171815415115]],"[[-0.150413684148], [-0.105341439584], [0.0882..."
9,"[[[1.81823012462e-05], [1.35959202279e-05], [-...",17.996688,"[[-1.49240444399e-09, -0.000824537460295, -1.7...",[[1.22144945049]],[[0.00329542089926]],"[[-0.179907855374], [-0.13772743612], [0.06400..."


In [None]:
                
mean_list = [i for i in np.linspace(-0.5, 0.5, 10)]
l_rate_lst = [10**(-i)/3 for i in range(4, 10, 1)]



for l in range(len(l_rate_lst)):
    for k in range(len(mean_list)):
            learn_average_deg_variable(communities = 2, group_size = 10, projection_dim=2, print_ratio=1, l_rate=l_rate_lst[l], mean=mean_list[k], sd=0.2)
              

[3.3333333333333335e-05, 3.3333333333333337e-06]