In [5]:
% matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

from sklearn import (datasets, decomposition, ensemble, manifold, random_projection)

In [21]:
def plot_digits(X, y):
    X = (X - np.min(X, 0)) / (np.max(X, 0) - np.min(X, 0))
    plt.figure(figsize=(12, 9))
    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(y[i]), color=plt.cm.Set1(0.1*y[i]), fontdict={'weight':'bold', 'size':8})

    plt.xticks([])
    plt.yticks([])
    plt.show()
    
def nearest_points(A, B, k):
    X = np.sum(A**2,axis = 1)[np.newaxis].T
    Y = np.sum(B**2,axis = 1)
    res = X + Y - 2. * np.dot(A,B.T)
    return np.argsort(res,axis=0)[:k,:].T
    
def KNN(trainData, testData, trainClass, testClass, k):
    error = 0
    error_list = []
    nearest = nearest_points(trainData,testData,k)
    length = testData.shape[0]
    nearestClass = np.zeros(length)
    for i in range(length):
        nearestClass[i] = np.argmax(np.bincount([trainClass[a] for a in nearest[i]]))
        if(nearestClass[i] != testClass[i]):
            error += 1
            error_list = np.append(error_list, [i])
    return error, error_list

def cross_validation(data, dataClass, k):
    size = data.shape[0]
    indices = np.arange(size)
    np.random.shuffle(indices)
    err = 0
    n = int(size/10)
    for i in range(10):
        mask1 = indices[np.concatenate((np.arange(i*n),np.arange((i+1)*n,size)))]
        mask2 = indices[np.arange(i*n,min((i+1)*n,size))]
        er, er_l = KNN(data[mask1],data[mask2],dataClass[mask1],dataClass[mask2],k)
        err += er
    return float(size - err)/(size) * 100

In [18]:
digits = datasets.load_digits()
X = digits.data
y = digits.target
N, d = X.shape

In [19]:
X_pca = decomposition.PCA(n_components=2, svd_solver='auto').fit_transform(X)
X_kpca = decomposition.KernelPCA(n_components=2, kernel="rbf", gamma=10).fit_transform(X)
X_iso = manifold.Isomap(n_neighbors=30, n_components=2).fit_transform(X)
X_lle = manifold.LocallyLinearEmbedding(30, n_components=2).fit_transform(X)
X_mds = manifold.MDS(n_components=2, n_init=1, max_iter=100).fit_transform(X)
X_tsne = manifold.TSNE(n_components=2, init='pca').fit_transform(X)

In [20]:
for k in range(1,15):
    print("k = ",k)
    print("X: ",cross_validation(X,y,k))
    print("X_pca: ",cross_validation(X_pca,y,k))
    print("X_kpca: ",cross_validation(X_kpca,y,k))
    print("X_iso: ",cross_validation(X_iso,y,k))
    print("X_lle: ",cross_validation(X_lle,y,k))
    print("X_mds: ",cross_validation(X_mds,y,k))
    print("X_tsne: ",cross_validation(X_tsne,y,k))
    print('')

k =  1
X:  98.88703394546467
X_pca:  58.48636616583194
X_kpca:  9.181969949916526
X_iso:  69.89426822481914
X_lle:  41.79187534780189
X_mds:  51.41903171953256
X_tsne:  98.94268224819143

k =  2
X:  98.60879243183082
X_pca:  57.7072899276572
X_kpca:  9.627156371730662
X_iso:  69.72732331663885
X_lle:  41.68057874234836
X_mds:  49.97217584863662
X_tsne:  98.94268224819143

k =  3
X:  98.8313856427379
X_pca:  60.322760155815246
X_kpca:  8.402893711741791
X_iso:  72.56538675570395
X_lle:  47.022815804117975
X_mds:  55.537006121313304
X_tsne:  98.94268224819143

k =  4
X:  98.60879243183082
X_pca:  62.15915414579855
X_kpca:  8.95937673900946
X_iso:  73.51140790205899
X_lle:  46.57762938230384
X_mds:  58.152476349471335
X_tsne:  99.05397885364496

k =  5
X:  98.77573734001113
X_pca:  63.38341680578742
X_kpca:  11.074012242626601
X_iso:  74.56872565386756
X_lle:  48.24707846410685
X_mds:  59.654980523094046
X_tsne:  98.94268224819143

k =  6
X:  98.49749582637729
X_pca:  64.66332776850307
X_