In [9]:
import pickle
import gzip
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.
    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.
    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.
    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.
    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
    f.close()
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def prepare_data(data):
    X_train = data[0][0]
    y_train = data[0][1]
    X_test = data[1][0]
    y_test = data[1][1]
    X_test_2 = data[2][0]
    y_test_2 = data[2][1]
    vector_y = []
    vector_y_test = []
    vector_y_test_2 = []
    X_train = X_train.reshape(X_train.shape[0], 1, 784)
    X_train = X_train.astype('float32')
    X_train /= 255
    X_test = X_test.reshape(X_test.shape[0], 1, 784)
    X_test = X_test.astype('float32')
    X_test /= 255
    X_test_2 = X_test_2.reshape(X_test_2.shape[0], 1, 784)
    X_test_2 = X_test_2.astype('float32')
    X_test_2 /= 255
    for i in range(len(y_train)):
        vector_y.append(list(vectorized_result(y_train[i]).T[0]))
    for i in range(len(y_test)):
        vector_y_test.append(list(vectorized_result(y_test[i]).T[0]))
    for i in range(len(y_test_2)):
        vector_y_test_2.append(list(vectorized_result(y_test_2[i]).T[0]))
    vector_y = np.array(vector_y)
    y_train = vector_y.astype('float32')
    vector_y_test = np.array(vector_y_test)
    y_test = vector_y_test.astype('float32')
    vector_y_test_2 = np.array(vector_y_test_2)
    y_test_2 = vector_y_test_2.astype('float32')
    return X_train, X_test, X_test_2, y_train, y_test, y_test_2 

In [13]:
class Layer:
    def __init__(self, inputs, outputs):
        self.weights = np.random.randn(inputs, outputs)/np.sqrt(inputs)
        self.bias = np.random.randn(1, outputs)

    def forward(self, input_data):
        self.input = input_data
        self.output = self.input @ self.weights + self.bias
        return self.output

    def backward(self, output_error, c):
        input_error = output_error @ self.weights.T
        weights_error = self.input.T @ output_error
        self.weights -= c * weights_error
        self.bias -= c * output_error
        return input_error

In [14]:
class ActivationFunction:
    def __init__(self, activation, activation_deriv):
        self.activation = activation
        self.activation_deriv = activation_deriv

    def forward(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward(self, output_error, c):
        return self.activation_deriv(self.input) * output_error
    
def activ(x):
    return np.tanh(x)

def derivative(x):
    return 1-np.tanh(x)**2

def loss_function(y_t, y_p):
    return np.mean(y_t-y_p ** 2)

def derivative_loss(y_t, y_p):
    return 2*(y_p-y_t)/y_t.size

In [15]:
class Network:
    def __init__(self, loss, loss_der):
        self.layers = []
        self.loss = loss
        self.loss_der = loss_der

    def add(self, layer):
        self.layers.append(layer)

    def predict(self, input_data):
        result = []
        for i in range(len(input_data)):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward(output)
            result.append(output)
        return result

    def train(self, X, y, loops, c):
        for i in range(loops):
            X,y = unison_shuffled_copies(X, y)
            for j in range(len(X)):
                output = X[j]
                for layer in self.layers:
                    output = layer.forward(output)

                error = self.loss_der(y[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward(error, c)

In [5]:
data = load_data()

In [10]:
X_train, X_test, X_test_2, y_train, y_test, y_test_2 = prepare_data(data)

In [22]:
net = Network(loss_function, derivative_loss)
net.add(Layer(784, 50))               
net.add(ActivationFunction(activ, derivative))
net.add(Layer(50, 10))                         
net.add(ActivationFunction(activ, derivative))
net.train(X_train, y_train, loops=30, c=0.1)

In [23]:
verification = []
for i in range(10000):
    predicted = np.argmax(net.predict(X_test[i])[0][0], axis=0)
    true = np.argmax(y_test[i], axis=0)
    if predicted == true:
        verification.append(1)
    else:
        verification.append(0)
        
    



print(sum(verification))


8354


In [24]:
verification_2 = []
for i in range(10000):
    predicted_2 = np.argmax(net.predict(X_test_2[i])[0][0], axis=0)
    true_2 = np.argmax(y_test_2[i], axis=0)
    if predicted_2 == true_2:
        verification_2.append(1)
    else:
        verification_2.append(0)
        
    



print(sum(verification_2))


8254
