In [10]:
import tensorflow as tf
import networkx as nx
import numpy as np
import pandas as pd
import scipy as sp

def balanced_stochastic_blockmodel(big_community, small_community, p_in=1.0, p_out=0.0, seed=None):
    """gives dense adjacency matrix representaiton of randomly generated SBM with balanced community size"""

    G = nx.random_partition_graph([big_community, small_community], p_in=p_in, p_out=p_out, seed=seed)
    A = nx.adjacency_matrix(G).todense()
    
    return A

DATA = [np.asarray(balanced_stochastic_blockmodel(4, 2, p, 0.1*p)).astype(np.double) for p in np.linspace(0.3, 0.3, 10)]


def AverageOperatorVertex(vertex, vector, graph):
    lst = [vector[i] for i in graph[vertex]]
    answer = np.sum(lst)
    return(answer)


def GraphLaplacian(graph, vector):
    #takes in graph and vector on graph indices, returns new vector
    new_vector = np.empty(len(vector),)
    for i in graph:
        #this step can be parallelized
        new_vector[i] = vector[i]*len(graph[i])-AverageOperatorVertex(vertex=i, vector = vector, graph = graph)
    return new_vector

In [8]:
#er_graph_10 = nx.erdos_renyi_graph(10, 0.2, seed=1)
print sp.sparse.csgraph.laplacian(DATA[1])#nx.to_numpy_matrix(er_graph_10))
#print GraphLaplacian(er_graph_10)

[[ 0. -0. -0. -0. -0. -0.]
 [-0.  0. -0. -0. -0. -0.]
 [-0. -0.  1. -0. -0. -1.]
 [-0. -0. -0.  0. -0. -0.]
 [-0. -0. -0. -0.  0. -0.]
 [-0. -0. -1. -0. -0.  1.]]


In [9]:
def laplacian_spectral_clustering(Matrix, k):
    """does classical laplacian spectral clustering with stochastic block model objects
    step 1: take the laplacian of the matrix
    step 2: do k means on rows as points
    step 3: use k means result to give assignment.
    For now we assume there are k communities"""
    
    laplacian = sp.sparse.csgraph.laplacian(Matrix)
    
    
    

In [32]:
def laplacian_spectral_clustering(big_community=40, small_community=30, p_in=0.4, p_out=0.0, k=2):
    """does classical laplacian spectral clustering with stochastic block model objects
    step 1: take the laplacian of the matrix
    step 2: do k means on rows as points
    step 3: use k means result to give assignment.
    For now we assume there are k communities"""
    


    A = balanced_stochastic_blockmodel(big_community=40, small_community=30, p_in=0.4, p_out=0.0)
    #B = joint_permutation(A)
    print A

    x = A
    dim_graph = len(x)
    k = 2

    x_tensor = tf.cast(x, tf.float32)
     #takes the degree of each vertex and makes diagonal matrix out of it
    laplacian = tf.matmul(tf.diag(tf.inv(tf.reduce_sum(x_tensor,0))),
                          x_tensor)
    #the laplacian is symmetric, we wish to get the k largest eigenvalues

    eigenval, eigenvec = tf.self_adjoint_eig(laplacian) #seems to be sorted for me
    Y = tf.slice(eigenvec, [0, dim_graph-k], [dim_graph, k]) #pick the top k eigenvectors


    #now we do K-means clustering on the rows of Y, which are the top k eignvectors of the laplacian above, or the bottom k of the normalized laplacian

    #find k random centroides

    centroides = tf.Variable(tf.slice(tf.random_shuffle(Y),[0,0],[k,-1]))

    expanded_Y = tf.expand_dims(Y, 0)
    expanded_centroides = tf.expand_dims(centroides, 1)

    diff = tf.sub(expanded_Y, expanded_centroides) #will get difference between eacnh centroide and all of thw points

    sqr = tf.square(diff) #sqr diff

    distances = tf.reduce_sum(sqr, 2)
    assignments = tf.argmin(distances, 0) #these are the clustering assignments based on current centroides

    means = tf.concat(0, 
                      [tf.reduce_mean(
                tf.gather(
                    Y, tf.reshape(
                        tf.where( 
                            tf.equal(assignments, c)),[1,-1])),
                reduction_indices=[1]) for c in xrange(k)])

    #these new means, calculated by group, will be the new centroides
    update_centroides = tf.assign(centroides, means)

    init = tf.initialize_all_variables()


    TRUE_A = np.append(np.ones([big_community]),np.zeros([small_community]))
    TRUE_B = 1-TRUE_A
    #accuracy = tf.minimum(assign)


    with tf.Session() as sess:
        sess.run(init)
        for step in xrange(100):
            _, centroid_values, assigment_values = sess.run([centroides, update_centroides, assignments])
        print sess.run([centroides, update_centroides, assignments, TRUE_A, TRUE_B])
        
        print "accuaracy:"


In [35]:
laplacian_spectral_clustering()

[[0 1 1 ..., 0 0 0]
 [1 0 1 ..., 0 0 0]
 [1 1 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 1]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 1 0 0]]


TypeError: Fetch argument array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.]) has invalid type <type 'numpy.ndarray'>, must be a string or Tensor. (Can not convert a ndarray into a Tensor or Operation.)

In [34]:
tf.reset_default_graph()