## classifying tiny digit images with MLP from scratch

10-class recognition problem, one hidden layer, square loss
- implement full-batch vectorized gradient descent
- monitor train and test error
- compute accuracies
- play with parameters
- add biases
- change to softmax laeyr with logistic loss

In [1]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

# main parameters
iterNum = 5000
stepSize = 10.0
hiddenSize = 10
trainNum = 500
displayFlag = False

# Display plots inline 
%matplotlib inline

In [2]:
# compute sigmoid
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output*(1-output)

In [3]:
# load and prepare dataset
digits = datasets.load_digits()
X_all = digits.data.T
y = digits.target
dataNum = X_all.shape[1]
labels = digits.target_names
outSize = labels.size 
inputSize = X_all.shape[0]

# building gt one-hot vectors 
Y_all = np.zeros((outSize,dataNum))
for n in range(dataNum):  
    Y_all[y[n],n] = 1 

# extract training and tet sets
testNum = dataNum - trainNum
X = X_all[:,:trainNum]
Y = Y_all[:,:trainNum]
X_test = X_all[:,trainNum:]
Y_test = Y_all[:,trainNum:]
print('%d training examples and %d test examples' % (trainNum,testNum))

# visualizing random samples
if displayFlag:
    tileSize = 10
    lst_strips = []
    for row in range(tileSize):
        lst_imgs = []
        ids = np.random.randint(0,dataNum,tileSize)
        for id in ids:
            lst_imgs.append(np.concatenate([digits.images[id],np.ones((8,2))],axis=1))
        strip = np.concatenate(lst_imgs, axis=1)
        lst_strips.append(np.concatenate([strip,np.ones((2,10*tileSize))],axis=0))
    tile = np.concatenate(lst_strips, axis=0)
    plt.figure(figsize=(4,4))
    plt.imshow(tile,interpolation='none',cmap='gray')
    plt.axis('off')
    plt.show()

500 training examples and 1297 test examples


In [4]:
# initialize weights randomly with mean 0
# np.random.seed(1)
W1 = np.random.randn(hiddenSize,inputSize)
W2 = np.random.randn(outSize,hiddenSize)

# visualize weights
if displayFlag:
    plt.figure(figsize=(4,4))
    plt.subplot(121)
    plt.imshow(W1,interpolation = 'none')
    plt.axis('off')
    plt.title('W1')
    plt.subplot(122)
    plt.imshow(W2,interpolation = 'none')
    plt.axis('off')
    plt.title('W2')

activation (no bias): $a_{\ell} = Wx_{\ell-1}$, output: $x_{\ell} = \sigma(a_{\ell})$  

square loss: $\frac{1}{2N} \sum_{n=1}^{N} \|x_2^{(n)}-y^{(n)}\|^2$

In [5]:
# compute batch loss and loss gradient  
def loss(pred,target): 
    return 0 # TBC
 
def lossGrad(pred,target):
    return 0 # TBC

In [6]:
trainError = np.zeros(iterNum)
testError = np.zeros(iterNum)

for iter in range(iterNum):

    # forward propagation on training and test data
    X0 = X
    A1 = np.dot(W1,X0)  
    X1 = sigmoid(A1)
    A2 = np.dot(W2,X1)
    X2 = sigmoid(A2)    
    
    X2_test = np.zeros(Y_test.shape) # TBC 
    
    # compute and print mean error on both datasets 
    trainError[iter] = np.mean(loss(X2,Y))
    testError[iter]  = np.mean(loss(X2_test,Y_test))
    if (iter% 1000) == 0 :
        print("Train/test error after %d iterations: %.4f / %.4f" % (iter,  trainError[iter], testError[iter]))
   
    # back propagation of training errors
    G2 = lossGrad(X2,Y)
    H2 = G2 * sigmoid_output_to_derivative(X2)
    G1 = np.dot(W2.T,H2)
    H1 = G1 * sigmoid_output_to_derivative(X1)
    
    dW1 = np.dot(H1,X0.T) / trainNum 
    dW2 = np.dot(H2,X1.T) / trainNum
    
    # Updating weights
    W2 -= stepSize * dW2
    W1 -= stepSize * dW1

Train/test error after 0 iterations: 0.0000 / 0.0000
Train/test error after 1000 iterations: 0.0000 / 0.0000
Train/test error after 2000 iterations: 0.0000 / 0.0000
Train/test error after 3000 iterations: 0.0000 / 0.0000
Train/test error after 4000 iterations: 0.0000 / 0.0000


In [7]:
# test accuracy
accuracy = float(0);
# TBC
print('Accuracy : %.4f' % accuracy)

Accuracy : 0.0000


In [8]:
# training and test plots
if displayFlag:
    plt.figure(figsize=(4,4))
    plt.plot(range(iterNum),trainError,'b',range(iterNum),testError,'r',)
    plt.show()

In [9]:
# visualize randomly picked test predictions
if displayFlag:
    sampleId = np.random.randint(0,testNum,5)
    for id in sampleId:
        if np.argmax(X2_test[:,id]) != y[id+trainNum] :
            colormap = 'hot'
        else: 
            colormap = 'gray'
        plt.figure(figsize=(1,1))
        plt.imshow(digits.images[id+trainNum],interpolation='none',cmap=colormap)
        plt.axis('off')
        plt.show()
        print('true: %d, predicted: %d' % (np.argmax(X2_test[:,id]) , y[id+trainNum]))