# Spectral Clustering

In [2]:
from itertools import permutations
import numpy as np
import scipy as sp
import scipy.sparse.linalg
import scipy.cluster.vq

In [3]:
A = np.loadtxt('../data/processed/usps.csv', delimiter=',')

In [101]:
inds = A[:, -1] < 6
X = A[inds, :-2]
Y = A[inds, -1].astype(int)

In [102]:
k = len(np.unique(Y))
n, d = X.shape
n, d

(4240, 254)

In [103]:
mu = 0
for i in range(n):
    for j in range(n):
        mu += np.linalg.norm(X[i] - X[j]) ** 2
mu /= (n ** 2)
mu = 1 / mu

In [104]:
W = np.empty((n, n))
for i in range(n):
    for j in range(i,n):
        val = np.e ** (-mu * np.linalg.norm(X[i] - X[j]) ** 2)
        W[i, j] = val
        W[j, i] = val

In [105]:
ww = W.sum(axis=0)
D = np.diag(ww)
D_ = np.diag(1 / np.sqrt(ww))
L = np.identity(n) - D_.dot(W).dot(D_)

In [112]:
V, Z = sp.linalg.eigh(L, eigvals=(0, k))

In [113]:
Z_ = sp.cluster.vq.whiten(Z)
centroids, distortion = sp.cluster.vq.kmeans(Z_, k)

In [114]:
centroids, distortion

(array([[-10.45134293,  -0.21695769,   0.84091388,   0.77027301,
          -1.40197231,  -0.10260371],
        [-11.50246835,  -1.23630465,  -0.86321964,  -0.30219783,
           0.12554126,  -0.15860273],
        [ -9.081826  ,   1.55943018,  -1.38024109,   0.40384435,
          -0.19201859,   0.15687877],
        [ -9.94601123,   0.17976618,   0.61511397,  -0.26675593,
           0.20957478,   2.0628695 ],
        [-10.58665069,   0.56090408,   0.68268302,  -0.28238634,
           0.67397283,  -0.67570837]]), 1.3841207685089576)

In [109]:
y_hat = np.zeros(n, dtype=int)
for i in range(n):
    dists = np.array([np.linalg.norm(Z_[i] - centroids[c]) for c in range(k)])
    y_hat[i] = np.argmin(dists)

In [110]:
perms = []
for p in permutations(np.arange(1, k+1)):
    P = dict()
    for i in range(k):
        P[i] = p[i]
    perms.append(P)

In [111]:
accuracy = np.zeros(len(perms))
for i in range(len(perms)):
    yy = y_hat.copy()
    for key, val in perms[i].items():
        yy[y_hat == key] = val
    accuracy[i] = (Y == yy).sum() / n * 100
accuracy.max()

79.787735849056602