In [31]:
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from sklearn import datasets
from keras.optimizers import Adam
from keras.utils import np_utils
import numpy as np
import argparse
import cv2
 
# load the MNIST digits dataset
mnist = fetch_mldata('MNIST original')

# reshape the MNIST dataset from 784-dim vectors to 28 x 28 pixel images 
data = mnist.data.reshape((mnist.data.shape[0], 28, 28))

# construct the training and testing splits and scale the data to the range [0, 1.0]
(trainData, testData, trainLabels, testLabels) = train_test_split(data / 255.0, mnist.target.astype("int"), test_size=0.33)

trainData = trainData.reshape(trainData.shape[0], 28, 28)

testData = testData.reshape(testData.shape[0], 28, 28)
print(testData.shape)


(23100, 28, 28)


In [40]:
import numpy as np
from scipy.special import expit
import sys
n_output = 10
n_features = trainData.shape[1]
n_hidden = 50
l1 = 0.1
l2 = 0.0
epochs = 1000
eta = 0.001
alpha = 0.001
decrease_const = 0.00001
shuffle = True
minibatches = 1


In [41]:
def _initialize_weights():
    """Initialize weights with small random numbers."""
    w1 = np.random.uniform(-1.0, 1.0,size=n_hidden*(n_features + 1))
    w1 = w1.reshape(n_hidden, n_features + 1)
    w2 = np.random.uniform(-1.0, 1.0,size=n_output*(n_hidden + 1))
    w2 = w2.reshape(n_output, n_hidden + 1)
    return w1, w2

def _sigmoid(z):
    """Compute logistic function (sigmoid)
    Uses scipy.special.expit to avoid overflow
    error for very small input values z.

    """
    # return 1.0 / (1.0 + np.exp(-z))
    return expit(z)

def _sigmoid_gradient(z):
    """Compute gradient of the logistic function"""
    sg = _sigmoid(z)
    return sg * (1.0 - sg)

def _add_bias_unit(X, how='column'):
    """Add bias unit (column or row of 1s) to array at index 0"""
    if how == 'column':
        X_new = np.ones((X.shape[0], X.shape[1] + 1))
        X_new[:, 1:] = X
    elif how == 'row':
        X_new = np.ones((X.shape[0] + 1, X.shape[1]))
        X_new[1:, :] = X
    else:
        raise AttributeError('`how` must be `column` or `row`')
    return X_new

def _feedforward(X, w1, w2):
    a1 = _add_bias_unit(X, how='column')
    z2 = w1.dot(a1.T)
    a2 = _sigmoid(z2)
    a2 = _add_bias_unit(a2, how='row')
    z3 = w2.dot(a2)
    a3 = _sigmoid(z3)
    return a1, z2, a2, z3, a3

def _L2_reg(lambda_, w1, w2):
    """Compute L2-regularization cost"""
    return (lambda_/2.0) * (np.sum(w1[:, 1:] ** 2) + np.sum(w2[:, 1:] ** 2))

def _L1_reg(lambda_, w1, w2):
    """Compute L1-regularization cost"""
    return (lambda_/2.0) * (np.abs(w1[:, 1:]).sum() + np.abs(w2[:, 1:]).sum())

def _get_cost(y_enc, output, w1, w2):
    """Compute cost function."""
    term1 = -y_enc * (np.log(output))
    term2 = (1.0 - y_enc) * np.log(1.0 - output)
    cost = np.sum(term1 - term2)
    L1_term = _L1_reg(l1, w1, w2)
    L2_term = _L2_reg(l2, w1, w2)
    cost = cost + L1_term + L2_term
    return cost

def _get_gradient(a1, a2, a3, z2, y_enc, w1, w2):
    """ Compute gradient step using backpropagation."""
    # backpropagation
    sigma3 = a3 - y_enc
    z2 = _add_bias_unit(z2, how='row')
    sigma2 = w2.T.dot(sigma3) * _sigmoid_gradient(z2)
    sigma2 = sigma2[1:, :]
    grad1 = sigma2.dot(a1)
    grad2 = sigma3.dot(a2.T)

        # regularize
    grad1[:, 1:] += l2 * w1[:, 1:]
    grad1[:, 1:] += l1 * np.sign(w1[:, 1:])
    grad2[:, 1:] += l2 * w2[:, 1:]
    grad2[:, 1:] += l1 * np.sign(w2[:, 1:])

    return grad1, grad2

def predict(X):
    """Predict class labels"""
    if len(X.shape) != 2:
        raise AttributeError('X must be a [n_samples, n_features] array.\n'
                                 'Use X[:,None] for 1-feature classification,'
                                 '\nor X[[i]] for 1-sample classification')

    a1, z2, a2, z3, a3 = _feedforward(X, w1, w2)
    y_pred = np.argmax(z3, axis=0)
    return y_pred

def fit(X, y, print_progress=False):
    """ Learn weights from training data."""
    w1, w2 = _initialize_weights()
    cost_ = []
    X_data, y_data = X.copy(), y.copy()
    #y_enc = _encode_labels(y, n_output)

    delta_w1_prev = np.zeros(w1.shape)
    delta_w2_prev = np.zeros(w2.shape)

    for i in range(epochs):

        # adaptive learning rate
        eta /= (1 + decrease_const*i)

        if print_progress:
            sys.stderr.write('\rEpoch: %d/%d' % (i+1, epochs))
            sys.stderr.flush()

        if shuffle:
            idx = np.random.permutation(y_data.shape[0])
            X_data, y_enc = X_data[idx], y_enc[:, idx]

        mini = np.array_split(range(y_data.shape[0]), minibatches)
        for idx in mini:

            # feedforward
            a1, z2, a2, z3, a3 = _feedforward(X_data[idx],
                                                       w1,
                                                       w2)
            cost = _get_cost(y_enc=y_enc[:, idx],
                                      output=a3,
                                      w1=w1,
                                      w2=w2)
            cost_.append(cost)

                # compute gradient via backpropagation
            grad1, grad2 = _get_gradient(a1=a1, a2=a2,
                                                  a3=a3, z2=z2,
                                                  y_enc=y_enc[:, idx],
                                                  w1=w1,
                                                  w2=w2)

            delta_w1, delta_w2 = eta * grad1, eta * grad2
            w1 -= (delta_w1 + (alpha * delta_w1_prev))
            w2 -= (delta_w2 + (alpha * delta_w2_prev))
            delta_w1_prev, delta_w2_prev = delta_w1, delta_w2



In [42]:

# training
fit(trainData, trainLabels, print_progress=True)

UnboundLocalError: local variable 'eta' referenced before assignment