In [None]:
import matplotlib.pyplot as plt

# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.metrics.pairwise import rbf_kernel
import scipy.linalg as slin
import numpy as np


# The digits dataset
digits = datasets.load_digits()

# The data that we are interested in is made of 8x8 images of digits, let's
# have a look at the first 4 images, stored in the `images` attribute of the
# dataset.  If we were working from image files, we could load them using
# matplotlib.pyplot.imread.  Note that each image must have the same size. For these
# images, we know which digit they represent: it is given in the 'target' of
# the dataset.
images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[:4]):
    plt.subplot(2, 4, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Training: %i' % label)

# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
data = digits.images.reshape((len(digits.images), -1))

# Create a classifier: a support vector classifier
X, y = data, digits.target
X_filtered, y_filtered = [], []

for i, label in enumerate(y):
    if label == 0 or label == 1:
        X_filtered.append(X[i])
        y_filtered.append(label)
n_samples = len(X_filtered)

n_train = int(n_samples * 0.8)
n_test = n_samples - n_train
X_train, y_train = X_filtered[:n_train], y_filtered[:n_train]
X_test, y_test = X_filtered[n_train:], y_filtered[n_train:]

rbf_classifier = svm.SVC(C=C, kernel='rbf', tol=1e-6, random_state=24, max_iter=5000)
rbf_classifier.fit(X_train, y_train)

gamma = 0.05
weight_decay = 0.0001
X_stacked = np.vstack((X_train, X_test))
K = rbf_kernel(X_stacked, gamma = gamma / n_train)
L = slin.cholesky(K, lower=True)
L_train = L[:n_train, :n_train]
L_test = L[n_train:, :n_train]


C = 1.0 / (len(L_train) * weight_decay)
classifier = svm.LinearSVC(
            C=C,
            loss='hinge',
            tol=1e-6,
            fit_intercept=True,
            random_state=24,
            max_iter=5000)

# We learn the digits on the first half of the digits
classifier.fit(L_train, y_train)

# Now predict the value of the digit on the second half:
expected = y_test
# predicted = classifier.predict(L_test)
predicted = rbf_classifier.predict(X_test)

print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
print(np.shape(classifier.coef_), n_train)

images_and_predictions = list(zip(digits.images[n_train:], predicted))
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
    plt.subplot(2, 4, index + 5)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Prediction: %i' % prediction)

plt.show()