# Different Feature Representations of MNIST digits

In [None]:
import numpy as np

def load(filename):
    Xy = np.loadtxt(filename, delimiter=',')
    y = Xy[:, -1]
    X = Xy[:, :-1]
    return X, y

from sklearn.neighbors import KNeighborsClassifier
def knn_predscore(trainX, trainy, testX, testy, k, metric='euclidean'):
    model = KNeighborsClassifier(n_neighbors=k,
                                metric=metric)

    model.fit(trainX, trainy)
    return model.score(testX, testy)

from sklearn.linear_model import Perceptron
def perceptron_predscore(trainX, trainy, testX, testy):
    model = Perceptron(n_iter=200)
    model.fit(trainX, trainy)
    return model.score(testX, testy)

Load the MNIST images as 784-dimensional vectors (each dim a pixel) as in PS1. Keep only the 9s and 4s -- binary classification.

In [None]:
trainXraw, trainyraw = load('training.txt')
testXraw, testyraw = load('testing.txt')

In [None]:
c1 = 7
c2 = 9
trainX = trainXraw[(trainyraw == c1) | (trainyraw == c2), :]
trainy = trainyraw[(trainyraw == c1) | (trainyraw == c2)]

testX = testXraw[(testyraw == c1) | (testyraw == c2), :]
testy = testyraw[(testyraw == c1) | (testyraw == c2)]

print 'Loaded', trainy.size, 'training points and', testy.size, 'testing points'

In [None]:
print 'kNN accuracy on original feature rep is', knn_predscore(trainX, trainy, testX, testy, 3)
print 'Perceptron accuracy on original feature rep is', perceptron_predscore(trainX, trainy, testX, testy)

### Tangent: Visualizing w

The weight vector learned by the perceptron to distinguish 7 and 9 is itself a 784-dim vector, since the data lives in 784 dimensions. Just for fun, let us visualize it as an image!

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
from numpy.linalg import norm

model = Perceptron(n_iter=200, fit_intercept=False)
model.fit(trainX, trainy)
w = model.coef_[0]
w/=norm(w, 2)

plt.imshow(w.reshape(28, 28), interpolation='none', cmap = plt.get_cmap('jet'), 
           vmin = np.min(w), vmax = np.max(w))
plt.colorbar()

## Experiment 1: Center and Variance-Scale the Features

### 1a

Compute the mean for each of the 784 features.
Center the points by subtracting the mean.

$$f_{i, j}-\mu_j$$

In [None]:
meanimg = np.mean(trainX, axis=0)  # get mean of each feature
# visualize mean image for fun
plt.imshow(meanimg.reshape(28, 28), interpolation='none', cmap = plt.get_cmap('gray'), vmin = 0, vmax = 255)

In [None]:
centeredTrainX = trainX-meanimg
centeredTestX = testX-meanimg # can't use means of test data; use training means
    
print 'kNN accuracy on centered feature rep is', knn_predscore(centeredTrainX, trainy, centeredTestX, testy, 3)
print 'Perceptron accuracy on centered feature rep is', perceptron_predscore(centeredTrainX, trainy, centeredTestX, testy)

### 1b

Compute the standard deviation for each of the features and normalize each vector.

$$\dfrac{f_{i, j}}{\sigma_j}$$

In [None]:
stds = np.std(trainX, axis=0)  # get variance on each column (feature)
stds[stds==0] = 1  # to prevent zero division errors
stdnormTrainX = trainX/stds
stdnormTestX = testX/stds # can't use variances of test data; use training variances

print 'kNN accuracy on normalized feature rep is', knn_predscore(stdnormTrainX, trainy, stdnormTestX, testy, 3)
print 'Perceptron accuracy on normalized feature rep is', perceptron_predscore(stdnormTrainX, trainy, stdnormTestX, testy)

censtdnormTrainX = centeredTrainX/stds
censtdnormTestX = centeredTestX/stds
print 'kNN accuracy on centered+normalized feature rep is', knn_predscore(censtdnormTrainX, trainy, censtdnormTestX, testy, 3)
print 'Perceptron accuracy on centered+normalized feature rep is', perceptron_predscore(censtdnormTrainX, trainy, censtdnormTestX, testy)

## Experiment 2: Example-Norm the Data-Points

Scale every data-point (row) by its Euclidean norm.

$$\dfrac{x}{||x||}$$

In [None]:
trainnorms = norm(trainX, axis=1, keepdims=True)
exnormTrainX = trainX/trainnorms
testnorms = norm(testX, axis=1, keepdims=True)
exnormTestX = testX/testnorms
print 'kNN accuracy on example-normed feature rep is', knn_predscore(exnormTrainX, trainy, exnormTestX, testy, 3)
print 'Perceptron accuracy on example-normed feature rep is', perceptron_predscore(exnormTrainX, trainy, exnormTestX, testy)

## Experiment 3: Simpler Feature Design

### Binarize the Feature Values

Make every feature value 0 or 1 by thresholding (remove grays).

In [None]:
binTrainX = np.zeros(trainX.shape)
binTrainX[trainX>50] = 1
binTestX = np.zeros(testX.shape)
binTestX[testX>50] = 1

In [None]:
print 'kNN accuracy on binarized feature rep is', knn_predscore(binTrainX, trainy, binTestX, testy, 3)
print 'Perceptron accuracy on binarized feature rep is', perceptron_predscore(binTrainX, trainy, binTestX, testy)

### Region-Based Feature Representations

Here's one simple way of featurizing the image with a small number of dimensions.

Re-represent the data points as n-dimensional vectors consisting of the number of black pixels in n regions of the image.

Does this give us enough information for classification?

In [None]:
def numblack(X, regions):
    Xregions = []
    dims = X.shape[1]
    for i in range(regions):
        if i==0:
            tmp = X[:, :dims/regions]
        else:
            tmp = X[:, dims*(i-1)/regions:dims*i/regions]
        Xregions.append(np.sum(tmp==0, axis=1))
    return np.vstack(tuple(Xregions)).T
    
blackTestX = numblack(testX, 28)
blackTrainX = numblack(trainX, 28)

In [None]:
print 'kNN accuracy on 1-dim feature rep is', sklearn_knn_predictscore(blackTrainX, trainy, blackTestX, testy, 3, 'euclidean')
print 'Perceptron accuracy on 1-dim feature rep is', perceptron_predscore(blackTrainX, trainy, blackTestX, testy)