In [4]:
% matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

from sklearn import (datasets, decomposition, ensemble, manifold, random_projection)
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def plot_digits(X, y):
    X = (X - np.min(X, 0)) / (np.max(X, 0) - np.min(X, 0))

    plt.figure(figsize=(12, 9))

    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(y[i]), color=plt.cm.Set1(0.1*y[i]), fontdict={'weight':'bold', 'size':8})

    plt.xticks([])
    plt.yticks([])
    plt.show()

In [8]:
digits = datasets.load_digits()
X = digits.data
y = digits.target
N, d = X.shape
N

1797

In [11]:
def crossVal(X,y,k,valtimes):
    err = 0
    data = np.append(X,y[:,np.newaxis], axis=1)
    for i in range(valtimes):
        np.random.shuffle(data)
        trainData = data[:-300,:-1]
        trainLabels = data[:-300,-1]
        testData = data[-300:,:-1]
        testLabels = data[-300:,-1]
        knn = KNeighborsClassifier(k)
        knn.fit(trainData,trainLabels)
        predictedLabels = knn.predict(testData)
        err += np.sum((predictedLabels != testLabels).astype(np.int))
    err /= valtimes
    return err

## Cross validation without dim reduction

In [13]:
for k in range(1,10,2):
    print(crossVal(X,y,k,10))

3.3
3.9
3.7
4.9
5.1


## Cross validation for PCA

In [17]:
X_pca = decomposition.PCA(n_components=2, svd_solver='full').fit_transform(X)
for k in range(1,16,2):
    print(crossVal(X_pca,y,k,10))

125.6
123.0
109.0
109.3
109.5
105.2
103.8
105.2


## Cross validation for KPCA

In [18]:
X_kpca = decomposition.KernelPCA(n_components=2, kernel="poly", degree=3, gamma=10).fit_transform(X)
for k in range(1,16,2):
    print(crossVal(X_kpca,y,k,10))

138.3
132.8
128.5
128.3
121.5
125.3
124.3
124.7


## Cross validation for Isomap

In [19]:
X_iso = manifold.Isomap(30, n_components=2).fit_transform(X)
for k in range(1,16,2):
    print(crossVal(X_iso,y,k,10))

91.6
81.1
73.4
71.5
71.1
69.9
69.9
75.1


## Cross validation for LLE

In [20]:
X_lle = manifold.LocallyLinearEmbedding(30, n_components=2, reg=0.0001).fit_transform(X)
for k in range(1,16,2):
    print(crossVal(X_lle,y,k,10))

214.9
203.3
200.4
196.1
196.5
198.0
195.9
198.2


## Cross validation for MDS

In [21]:
X_mds = manifold.MDS(n_components=2, eps=1e-4,n_init=1, max_iter=100).fit_transform(X)
for k in range(1,16,2):
    print(crossVal(X_mds,y,k,10))

142.8
118.4
114.1
115.7
112.5
110.7
110.6
111.5


## Cross validation for t-SNE

In [22]:
X_tsne = manifold.TSNE(n_components=2, init='random', learning_rate=1000).fit_transform(X)
for k in range(1,16,2):
    print(crossVal(X_tsne,y,k,10))

5.2
4.1
4.4
3.6
4.4
4.8
5.5
4.4
