In [39]:
import numpy as np
from multiview.datasets.base import load_UCImultifeature
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import rbf_kernel

In [174]:
data, labels = load_UCImultifeature()
for ind in range(len(data)):
    data[ind] = data[ind].astype(float)

In [175]:
def comp_laplacian(weights, d_mat):
    #print(d_mat)
    for ind in range(d_mat.shape[0]):
        if(d_mat[ind, ind] != 0):
            #print(np.sqrt(d_mat[ind, ind]))
            d_mat[ind, ind] = 1 / np.sqrt(d_mat[ind, ind])
    laplacian = np.eye(weights.shape[0])  - d_mat @ weights @ d_mat

    return laplacian

def comp_affinity(data, slt_indices, other_indices ):
    
    print(data)
    slt = data[slt_indices]
    raw = data[other_indices]
    num_samples = data.shape[0]
    affinity = rbf_kernel(raw, slt)
    indices = np.argsort(affinity, axis = 1)[:, :-8]
    for ind in range(affinity.shape[0]):
        affinity[ind, indices[ind]] = 0
    affinity /= np.sum(affinity, axis = 1).reshape(-1, 1)
    zero_block1 = np.zeros((affinity.shape[0],affinity.shape[0]))
    zero_block2 = np.zeros((affinity.shape[1], affinity.shape[1]))
    top_half = np.hstack((zero_block1, affinity))
    bot_half = np.hstack((affinity.T, zero_block2))
    weight = np.vstack((top_half, bot_half))

    D_mat = np.concatenate((np.sum(affinity, axis=1), 
                            np.sum(affinity.T, axis=1)))
    D_mat = np.diag(D_mat)
    return weight, D_mat
    
def get_salient(data, n_slt):
    kmeans= KMeans(n_clusters = n_slt)
    cat_data = np.hstack(data)
    kmeans.fit(cat_data)
    centroids = kmeans.cluster_centers_
    dists = cdist(centroids, cat_data)
    indices = np.argsort(dists, axis = 1)[:, 0]
    indices = indices.flatten()
    other_indices = set(list(range(data[0].shape[0]))).difference(indices)
    other_indices = np.array(list(other_indices))
    
    #slt_pts = list()
    #for k in range(len(data)):
    #    slt_pts.append(data[k][indices])
    return indices, other_indices
    

In [176]:
def spectral_clustering(v_data, n_clusters, n_slt = 40, r = 0.5, n_iter = 10):
    
    #Select salient points
    slt_inds, other_inds = get_salient(v_data, n_slt)
    
    laplacians = list()

    weight, d_mat = comp_affinity(v_data[1], slt_inds, other_inds)
    print(d_mat)
    laplacians.append(comp_laplacian(weight, d_mat))
    
    #for view in range(len(v_data)):
    #    weight, d_mat = comp_affinity(v_data[view], slt_inds, other_inds)
    #    laplacians.append(comp_laplacian(weight, d_mat))

In [177]:
clusters = spectral_clustering(data, n_clusters = 10)

[[ 98. 236. 531. ...  15.  12.  13.]
 [121. 193. 607. ...  13.  15.  11.]
 [115. 141. 590. ...  14.  13.   6.]
 ...
 [337. 299. 852. ...  15.  17.  21.]
 [247. 261. 866. ...  13.  15.  15.]
 [355. 379. 867. ...  12.  12.  20.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[nan  0.  0. ...  0.  0.  0.]
 [ 0. nan  0. ...  0.  0.  0.]
 [ 0.  0. nan ...  0.  0.  0.]
 ...
 [ 0.  0.  0. ... nan  0.  0.]
 [ 0.  0.  0. ...  0. nan  0.]
 [ 0.  0.  0. ...  0.  0. nan]]


