In [2]:
import functools
import numpy as np
import sklearn.metrics
import sklearn.datasets
import sklearn.model_selection
import matplotlib.pyplot as plt
import scipy.special

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [8]:
class MultiLayerPerceptron:
    def __init__(self, shape, weights=None):
        self.shape = shape
        self.num_layers = len(shape)
        if weights is None:
            self.weights = []
            for i in range(self.num_layers - 1):
                W = np.random.uniform(size=(self.shape[i + 1], self.shape[i] + 1))
                self.weights.append(W)
        else:
            self.weights = weights

    def run(self, data):
        layer = data.T
        for i in range(self.num_layers - 1):
            prev_layer = layer
            o = np.dot(self.weights[i], prev_layer)
            # sigmoid
            layer = scipy.special.expit(o)
        return layer


In [12]:
class Result(object):
    def __init__(self, best_particle, best_scores, accuracies, num_iterations):
        self.best_particle = best_particle
        self.best_scores = best_scores
        self.accuracies = accuracies
        self.num_iterations = num_iterations

def dim_weights(shape):
    dim = 0
    for i in range(len(shape) - 1):
        dim = dim + (shape[i] + 1) * shape[i + 1]
    return dim

def eval_accuracy(weights, shape, X, y):
    corrects, wrongs = 0, 0
    nn = MultiLayerPerceptron(shape, weights=weights)
    predictions = []
    for i in range(len(X)):
        out_vector = nn.run(X[i])
        y_pred = np.argmax(out_vector)
        predictions.append(y_pred)
        if y_pred == y[i]:
            corrects += 1
        else:
            wrongs += 1
    return corrects, wrongs, predictions

def weights_to_vector(weights):
    w = np.asarray([])
    for i in range(len(weights) + 1):
        v = weights[i].flatten()
        w = np.append(w, v)
    return w


def vector_to_weights(vector, shape):
    weights = []
    idx = 0
    for i in range(len(shape) - 1):
        r = shape[i + 1]
        c = shape[i]
        idx_min = idx
        idx_max = idx + r * c
        W = vector[idx_min:idx_max].reshape((r, c))
        weights.append(W)
        idx = idx_max
    return weights

def eval_neural_network_via_vector(weights, shape, X, y):
    mse = np.asarray([])
    weight = vector_to_weights(np.array(weights), shape)
    nn = MultiLayerPerceptron(shape, weights=weight)
    y_pred = nn.run(X)
    mse = np.append(mse, sklearn.metrics.mean_squared_error(np.atleast_2d(y), y_pred))
    return mse

def eval_neural_network_via_weights(weights, shape, X, y):
    mse = np.asarray([])
    for w in weights:
        weight = vector_to_weights(w, shape)
        nn = MultiLayerPerceptron(shape)
        nn.weights = weight
        y_pred = nn.run(X)
        mse = np.append(mse, sklearn.metrics.mean_squared_error(np.atleast_2d(y), y_pred))
    return mse


def print_best_particle(best_particle):
    print("New best weights found at iteration #{i} with mean squared error: {score}".format(i=best_particle[0], score=best_particle[1]))

In [23]:
import numpy as np
import scipy.special
import sklearn.metrics

class MLPStochasticGradDescent:
    def __init__(self, shape, learning_rate=0.1, max_epochs=1000, batch_size=1, print_epochs=True):
        self.shape = shape
        self.num_layers = len(shape)
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.print_epochs = print_epochs
        self.weights = []

    def initialize_weights(self):
        self.weights = []
        for i in range(self.num_layers - 1):
            W = np.random.uniform(size=(self.shape[i + 1], self.shape[i]))
            self.weights.append(W)

    def forward_propagation(self, X):
        activations = [X]
        for i in range(self.num_layers - 2):
            activation = scipy.special.expit(np.dot(activations[-1], self.weights[i]))
            activations.append(activation)

        output = scipy.special.expit(np.dot(activations[-1], self.weights[-1].T))
        activations.append(output)
        return activations

    def backward_propagation(self, X, y, activations):
        error = activations[-1] - y
        delta = error * activations[-1] * (1 - activations[-1])

        for i in range(self.num_layers - 2, 0, -1):
            self.weights[i] -= self.learning_rate * np.dot(delta.T, activations[i])
            hidden_error = np.dot(delta, self.weights[i])
            delta = hidden_error * activations[i] * (1 - activations[i])

        self.weights[0] -= self.learning_rate * np.dot(delta.T, X)

    def get_score(self, X, y):
        nn = MultiLayerPerceptron(self.shape, weights=self.weights)
        y_pred = nn.run(X)
        mse = sklearn.metrics.mean_squared_error(y, y_pred.T)
        return mse

    def train(self, X, y_onehot, y):
        self.initialize_weights()
        i = 0
        accuracies = []
        best_scores = [(i, 1)]
        if self.print_epochs:
            print_best_particle([i, best_scores[-1]])
        for epoch in range(self.max_epochs):
            indices = np.random.permutation(X.shape[0])
            X_shuffled = X[indices]
            y_onehot_shuffled = y_onehot[indices]
            for j in range(0, X.shape[0], self.batch_size):
                X_batch = X_shuffled[j:j + self.batch_size]
                y_batch = y_onehot_shuffled[j:j + self.batch_size]
                activations = self.forward_propagation(X_batch)
                self.backward_propagation(X_batch, y_batch, activations)
                i += 1

            score = self.get_score(X, y_onehot)
            corrects, wrongs, predictions = eval_accuracy(self.weights, self.shape, X, y)
            accuracy = corrects / (corrects + wrongs)
            best_scores.append((i, score))
            if self.print_epochs:
                print_best_particle([i, best_scores[-1]])
                print("With accuracy: {accuracy}".format(accuracy=accuracy))
            accuracies.append(accuracy)

        return Result(
            best_particle=self.weights,
            best_scores=best_scores,
            accuracies=accuracies,
            num_iterations=self.max_epochs
        )

    def predict(self, X):
        activations = self.forward_propagation(X)
        return np.argmax(activations[-1], axis=1)


In [24]:
# Load MNIST digits from sklearn
num_classes = 10
mnist = sklearn.datasets.load_digits(n_class=num_classes)
X, X_test, y, y_test = sklearn.model_selection.train_test_split(mnist.data, mnist.target, test_size=0.2, random_state=42)

num_inputs = X.shape[1]

y_true = np.zeros((len(y), num_classes))
for i in range(len(y)):
    y_true[i, y[i]] = 1

y_test_true = np.zeros((len(y_test), num_classes))
for i in range(len(y_test)):
    y_test_true[i, y_test[i]] = 1

In [None]:
RESULTS = []
# Normalize the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

# Convert target labels to one-hot encoding
y_train_onehot = np.eye(num_classes)[y]

# Define the MLP shape and hyperparameters
mlp_shape = [X_train.shape[1], 64, num_classes]
learning_rate = 0.01
max_epochs = 1000
print_epochs = False
batch_size = 10

# Instantiate and train the MLP
mlp = MLPStochasticGradDescent(shape=mlp_shape, learning_rate=learning_rate,batch_size=batch_size, max_epochs=max_epochs, print_epochs=print_epochs)
RESULTS.append(["Gradient Descent", mlp.train(X_train, y_train_onehot, y)])