In [19]:
import numpy as np

In [20]:
class MLPClassifier():
    def __init__(self, hidden_layer_sizes, alpha=0.5, max_iter=100, n_samples=100, min_diff=1e-4):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.alpha = alpha
        self.max_iter = max_iter
        self.n_samples = n_samples
        self.min_diff = min_diff
    
    def fit(self, X, y):
        m, n = X.shape
        K = y.shape[1]
        self.topology = [n, self.hidden_layer_sizes, K]
        self.L = len(self.topology)
        self.Theta = []
        self.Delta = []
        self.A = []
        self.b = []
        for s_in, s_out in zip(self.topology, self.topology[1:]):
            self.Theta.append(np.random.randn(s_out, s_in))
            self.b.append(np.random.randn(s_out))

        for s_l in self.topology:
            self.A.append(np.empty(s_l))
            self.Delta.append(np.empty(s_l)) # we don't use self.Delta[0]
        
        loss_sum = np.float64(0)
        count = 0
        for _ in range(0, self.max_iter):
            for i in range(0, m):
                self.feed_forward(X[i])
                self.back_prop(y[i])
                loss_sum += self.compute_loss(y[i])
                count += 1
                if count == self.n_samples:
                    if loss_sum / self.n_samples < self.min_diff:
                        return self
                    count = 0
                    loss_sum = np.float64(0)
        return self
        
    def predict(self, X):
        s_L = self.topology[self.L-1]
        m = X.shape[0]
        y = np.empty([m, s_L])
        for i in range(0, m):
            self.feed_forward(X[i])
            y[i][:] = self.A[self.L-1]
        return y
    
    def feed_forward(self, x_i):
        self.A[0][:] = x_i
        for l in range(0, self.L-1):
            z = self.Theta[l] @ self.A[l] + self.b[l]
            self.A[l+1][:] = self.g(z)
        
    # activation function
    def g(self, z):
        return 1.0 / (1 + np.exp(-z))

    def back_prop(self, y_i):
        self.Delta[self.L-1][:] = self.A[self.L-1] - y_i
        for l in range(self.L-2, -1, -1):
            gprime = self.A[l] * (1 - self.A[l])
            self.Delta[l][:] = gprime * (self.Theta[l].T @ self.Delta[l+1])
        # update weights
        for l in range(0, self.L-1):
            l_in, l_out = self.topology[l:l+2]
            self.Theta[l][:] -= self.alpha * self.Delta[l+1].reshape([l_out, 1]) @ self.A[l].reshape([1, l_in])
        # update biased terms
        for l in range(0, self.L-2):
            self.b[l][:] -= self.alpha * self.Delta[l+1]
    
    def compute_loss(self, y_i):
        h = self.A[self.L-1]
        return - (y_i @ np.log(h) + (1 - y_i) @ np.log(1 - h))

In [21]:
from sklearn import datasets, linear_model

digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

n_samples = len(X_digits)
n_train = int(.9 * n_samples)

X_train = X_digits[:n_train]
y_train = y_digits[:n_train]
X_test = X_digits[n_train:]
y_test = y_digits[n_train:]

logistic = linear_model.LogisticRegression()

print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_test, y_test))

LogisticRegression score: 0.938889


In [22]:
# preprocessing
# scale the feature to make the training faster
x_max = np.max(X_train)
X_train_norm = X_train / x_max
X_test_norm = X_test / x_max

# transform label to vector
y_train_vec = np.array([np.eye(10)[y_i] for y_i in y_train])
y_test_vec = np.array([np.eye(10)[y_i] for y_i in y_test])

In [23]:
mlp = MLPClassifier(hidden_layer_sizes=30, max_iter=100)

In [24]:
mlp.fit(X_train_norm, y_train_vec)

<__main__.MLPClassifier at 0x7f0da4906f28>

In [25]:
# compute accuracy
n_test = len(y_test)
h = mlp.predict(X_test_norm)
n_count = 0
for i in range(0, n_test):
    label = np.argmax(h[i])
    if label == y_test[i]:
        n_count += 1
print('Test accuracy: ', n_count / n_test)

Test accuracy:  0.9444444444444444


In [26]:
# training accuracy
n_train = len(y_train)
h = mlp.predict(X_train_norm)
n_count = 0
for i in range(0, n_train):
    label = np.argmax(h[i])
    if label == y_train[i]:
        n_count += 1
print('Training accurary: ', n_count / n_train)

Training accurary:  1.0
