In [57]:
import numpy as np
import scipy as sc
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans
from sklearn import metrics

In [3]:
from numpy.linalg import eig, inv, eigh
from scipy.linalg import sqrtm

In [181]:
class ComplexKMeans:
    def __init__(self, k_clusters=2, n_iter=100, dist_fun=cdist):
        self.k_clusters = k_clusters
        self.n_iter = n_iter
        self.is_fit = False
        self.dist = dist_fun
    
    distance_matrix = lambda self, x: np.array([[self.dist(np.array([vec]), np.array([centroid])) for centroid in self.centroids] for vec in x])
    
    def _assign_labels(self):
        for vec_num, vec_dists in enumerate(self.distances):
            self.labels[vec_num] = vec_dists.argmin()
            
    
    def fit(self, x):
        # initialize centroids and labels
        self.centroids = x[:self.k_clusters]
        self.labels = np.array([0] * len(x))
        
        for it in range(self.n_iter):
            # calculate distances and assign centroids
            self.distances = self.distance_matrix(x)
            self._assign_labels()
            
            # recalculate centroids
            for cl_num in range(self.k_clusters):
                vecs = [vec for ind, vec in enumerate(x) if self.labels[ind] == cl_num]
                self.centroid = np.average(vecs, axis=0)
        
        # calculate final distances and labels
        self.distances = self.distance_matrix(x)
        self._assign_labels()
        
        self.is_fit = True
                
    def predict(self):
        return self.labels
    
    def fit_predict(self, x):
        self.fit(x)
        return self.predict()

In [182]:
a = ComplexKMeans(k_clusters=2, n_iter=100)
a.fit(adjacency)

  return array(a, dtype, copy=False, order='C', ndmin=1)


In [180]:
a.predict()

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [157]:
np.array([adjacency[0]]).ndim

2

In [147]:
def spectral_clustering(adjacency):

    adj_matrix = np.matrix(adjacency)

    diag_degrees = np.diag([np.sum(i) for i in adj_matrix])
    sqrt_diag_degrees = sqrtm(inv(diag_degrees))

    laplacian = np.identity(len(adj_matrix)) - np.dot(np.dot(sqrt_diag_degrees, adj_matrix), sqrt_diag_degrees)
    #laplacian = diag_degrees - adj_matrix
    #laplacian = adj_matrix
    
    eigs = dict(zip(*eig(laplacian)))
    # find max eigenvalues
    eig_vals = list(eigs.keys())
    max_eig_val = max(eig_vals, key=abs)
    eig_vals.remove(max_eig_val)
    max2_eig_val = max(eig_vals, key=abs)
    
    to_cluster = np.array(list(map(lambda vec: np.array(vec)[0], [eigs[max_eig_val], eigs[max2_eig_val]]))).transpose()
    print(cdist(cluster_size[0]))
    kmeans = KMeans(n_clusters=2, random_state=0).fit(to_cluster)
    return kmeans.labels_

In [152]:
true_labels = [0] * int(l / 2) + [1] * int(l / 2)
pred_labels = spectral_clustering(adjacency)

metrics.adjusted_rand_score(true_labels, pred_labels)

[[ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [-0.        +0.00000000e+00j -0.        +0.00000000e+00j]
 [-0.        +0.00000000e+00j -0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        -0.00000000e+00j  0.        -0.00000000e+00j]
 [-0.        +0.00000000e+00j -0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [-0.        +0.00000000e+00j -0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00j]
 [ 0.        +0.00000000e+00j  0.        +0.00000000e+00

  array = np.array(array, dtype=dtype, order=order, copy=copy)


0.0974991468824648

In [151]:
adjacency = np.array([[complex(1, 0)] * cluster_size + [complex(0, 0)] * cluster_size] * cluster_size +
                        [[complex(0, 0)] * cluster_size + [complex(1, 0)] * cluster_size] * cluster_size)


In [150]:
l = 100
adjacency = []
cluster_size = int(l / 2)
for i in range(l):
    if i < l / 2:
        adjacency.append([1] * cluster_size + [0] * cluster_size)
    else:
        adjacency.append([0] * cluster_size + [1] * cluster_size)
adjacency = np.array(adjacency)

In [37]:
pred_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,