In [1]:
import numpy as np
from multiview.datasets.base import load_UCImultifeature
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import rbf_kernel

In [20]:
def normalize_mat(X_mat):  
    min_val = np.min(X_mat)
    max_val = np.max(X_mat)
    norm_mat = (X_mat - min_val) / (max_val - min_val)
    return norm_mat
    
def gaussian_sim(X_mat, Y_mat):
    
    features = X_mat.shape[1]
    distances = cdist(X_mat, Y_mat)
    print(distances)
    sq_dists = np.square(distances)
    norm_dists = sq_dists  / features
    print(norm_dists)
    sims = np.exp(-norm_dists)
    
    return sims

def comp_laplacian(weights, d_mat):
    #print(d_mat)
    for ind in range(d_mat.shape[0]):
        if(d_mat[ind, ind] != 0):
            #print(np.sqrt(d_mat[ind, ind]))
            d_mat[ind, ind] = 1 / np.sqrt(d_mat[ind, ind])
    laplacian = np.eye(weights.shape[0])  - d_mat @ weights @ d_mat

    return laplacian

def comp_affinity(data, slt_indices, other_indices ):
    
    slt = data[slt_indices]
    raw = data[other_indices]
    num_samples = data.shape[0]
    affinity = rbf_kernel(raw, slt)
    indices = np.argsort(affinity, axis = 1)[:, :-8]
    for ind in range(affinity.shape[0]):
        affinity[ind, indices[ind]] = 0
    affinity /= np.sum(affinity, axis = 1).reshape(-1, 1)
    zero_block1 = np.zeros((affinity.shape[0],affinity.shape[0]))
    zero_block2 = np.zeros((affinity.shape[1], affinity.shape[1]))
    top_half = np.hstack((zero_block1, affinity))
    bot_half = np.hstack((affinity.T, zero_block2))
    weight = np.vstack((top_half, bot_half))

    D_mat = np.concatenate((np.sum(affinity, axis=1), 
                            np.sum(affinity.T, axis=1)))
    D_mat = np.diag(D_mat)
    
    
    return weight, D_mat
    
def get_salient(data, n_slt):
    kmeans= KMeans(n_clusters = n_slt)
    cat_data = np.hstack(data)
    kmeans.fit(cat_data)
    centroids = kmeans.cluster_centers_
    dists = cdist(centroids, cat_data)
    indices = np.argsort(dists, axis = 1)[:, 0]
    indices = indices.flatten()
    other_indices = set(list(range(data[0].shape[0]))).difference(indices)
    other_indices = np.array(list(other_indices))
    
    #slt_pts = list()
    #for k in range(len(data)):
    #    slt_pts.append(data[k][indices])
    return indices, other_indices
    

In [21]:
def spectral_clustering(v_data, n_clusters, n_slt = 40, r = 0.5, n_iter = 10):
    
    #Select salient points
    slt_inds, other_inds = get_salient(v_data, n_slt)
    
    laplacians = list()
    #For each view
    for view in range(len(v_data)):
        #Compute weight matrix and diagonal matrix for each view
        weight, d_mat = comp_affinity(v_data[view], slt_inds, other_inds)
        #Compute the laplacian for each view
        laplacians.append(comp_laplacian(weight, d_mat))
        
    alphas = np.ones((n_clusters,)) / n_clusters
    
    for iter in range(n_iter):
        

In [22]:
data, labels = load_UCImultifeature()
for ind in range(len(data)):
    data[ind] = normalize_mat(data[ind])

In [23]:
clusters = spectral_clustering(data, n_clusters = 10)

[[0.07243163 0.1744272  0.3924612  ... 0.01108647 0.00886918 0.00960828]
 [0.08943089 0.14264597 0.44863267 ... 0.00960828 0.01108647 0.00813008]
 [0.0849963  0.10421286 0.436068   ... 0.01034738 0.00960828 0.00443459]
 ...
 [0.24907613 0.22099039 0.62971175 ... 0.01108647 0.01256467 0.01552106]
 [0.18255728 0.19290466 0.64005913 ... 0.00960828 0.01108647 0.01108647]
 [0.2623799  0.28011826 0.64079823 ... 0.00886918 0.00886918 0.01478197]]
(40, 216)
(1960, 216)
[[ 1.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.          1.          0.         ...  0.          0.
   0.        ]
 [ 0.          0.          1.         ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ... 38.25157758  0.
   0.        ]
 [ 0.          0.          0.         ...  0.         62.87746909
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
  35.77040593]]
