# MNIST

Goal: train classifier with 4% error in <1 minute on a laptop

In [1]:
import io
import urllib
import cvxpy as cp
import numpy as np
import numpy.linalg as LA

## Dataset

In [None]:
mnist = np.load(io.BytesIO(urllib.urlopen("http://epopt.s3.amazonaws.com/mnist.npz").read()))

## Random Fourier features

In [None]:
def error(x, y):
    return 1 - np.sum(x == y) / float(len(x))

def predict(X, theta):
    return np.array((X.dot(theta) > 0)*2 - 1).ravel()

In [None]:
def pca(X):
    dim = 50
    X -= X.mean(X, axis=0)
    _, D = LA.eigh(X.T.dot(X))
    return X.dot(D[:, :dim])

def median_dist(X):
    m = X.shape[0]
    k = int(m**1.5)
    I = np.random.randint(0, m, k)
    J = np.random.randint(0, m, k)
    dists = sorted(map(lambda i : LA.norm(X[I[i],:] - X[J[i],:]), xrange(k)))
    return dists[k / 2]

def random_features(X, n):
    X = pca(X)
    sigma = median_dist(X)
    W = np.random.randn(X.shape[1], n) / sigma / np.sqrt(2)
    b = np.random.uniform(0, 2*np.pi, n)
    return np.cos(X.dot(W) + b)

X = mnist["X"] / 255.
y = (mnist["Y"].ravel() % 2 == 1)*2-1
theta = LA.solve((X.T.dot(X)), X.T.dot(y))
print "Error:", error(predict(X, theta), y)

## Loss functions

In [None]:
# Multiclass classification
def one_hot(y, k):
    m = len(y)
    return sp.coo_matrix((np.ones(m), (np.arange(m), y)), shape=(m, k)).todense()

def softmax_loss(Theta, X, y):
    k = Theta.size[1]
    return (cp.sum_entries(cp.log_sum_exp(X*Theta, axis=1)) -
            cp.sum_entries(cp.mul_elemwise(one_hot(y, k), X*Theta)))

def multiclass_hinge_loss(Theta, X, y):
    n, k = Theta.size
    ones = np.ones((k,k))
    Y = one_hot(y, k)
    Z = X*Theta
    ZY = cp.mul_elemwise(Y, Z)*ones
    return cp.sum_entries(cp.max_entries(Z - ZY + (1-Y), axis=1))

## Random features

In [None]:
mnist = datasets.fetch_mldata("MNIST original")
Xt, Xs, yt, ys = cross_validation.train_test_split(
    mnist["data"], mnist["target"], train_size=60000, random_state=0)

In [None]:
np.round(lr.predict(Xs))

In [None]:
from sklearn import linear_model

def err(x, y):
    return 1 - np.sum(x == y) / float(len(x))

lr = linear_model.RidgeCV()
lr.fit(Xt,yt)
print "Error:", err(ys, np.round(lr.predict(Xs)))