In [1]:
from sklearn.utils import shuffle

#from sklearn.datasets import fetch_mldata, fetch_openml
from sklearn.datasets import *
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import numpy as np

np.random.seed(34)
mnist = fetch_mldata('MNIST original')


x_mnist = mnist.data.astype('float32') / 255.
t_mnist = np.eye(10)[mnist.target.astype('int32')]

x_train_mnist, x_test_mnist, t_train_mnist, t_test_mnist = train_test_split(x_mnist, t_mnist, test_size=10000)
x_train_mnist, x_valid_mnist, t_train_mnist, t_valid_mnist = train_test_split(x_train_mnist, t_train_mnist, test_size=10000)

In [2]:
def relu(x):
    return np.maximum(x, 0)

def deriv_relu(x):
    return (x > 0).astype(x.dtype)
def np_log(x):
    return np.log(np.clip(x, 1e-10, x))

In [3]:
def softmax(x):
    x -= x.max(axis=1, keepdims=True)
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)

def deriv_softmax(x):
    return softmax() * (1 - softmax(x))

In [4]:
class Dense:
    def __init__(self, in_dim, out_dim, function, deriv_function):
        self.W = np.random.uniform(low=-0.08, high=0.08,
                                   size=(in_dim, out_dim)).astype('float64')
        self.b = np.zeros(out_dim).astype('float64')
        self.function = function
        self.deriv_function = deriv_function
        
        self.x = None
        self.u = None
        
        self.dW = None
        self.db = None

        self.params_idxs = np.cumsum([self.W.size, self.b.size])

    def __call__(self, x):
        self.x = x
        self.u = np.matmul(self.x, self.W) + self.b
        return self.function(self.u)

    def b_prop(self, delta, W):
        self.delta = self.deriv_function(self.u) * np.matmul(delta, W.T)
        return self.delta
    
    def compute_grad(self):
        batch_size = self.delta.shape[0]
        
        self.dW = np.matmul(self.x.T, self.delta) / batch_size
        self.db = np.matmul(np.ones(batch_size), self.delta) / batch_size

    def get_params(self):
        return np.concatenate([self.W.ravel(), self.b], axis=0)
    
    def set_params(self, params):
        _W, _b = np.split(params, self.params_idxs)[:-1]
        self.W = _W.reshape(self.W.shape)
        self.b = _b
    
    def get_grads(self):
        return np.concatenate([self.dW.ravel(), self.db], axis=0)

In [5]:
def f_props(layers, x):
    for layer in layers:
        x = layer(x)
    return x

In [6]:
def b_props(layers, delta):
    batch_size = delta.shape[0]
    
    for i, layer in enumerate(layers[::-1]):
        if i == 0: # 出力層の場合
            layer.delta = delta # y - t
            layer.compute_grad() # 勾配の計算
        else: # 出力層以外の場合
            delta = layer.b_prop(delta, W) # 逆伝播
            layer.compute_grad() # 勾配の計算

        W = layer.W

In [7]:
def update_params(layers, eps):
    for layer in layers:
        layer.W -= eps * layer.dW
        layer.b -= eps * layer.db

In [8]:
layers = [
    Dense(784, 100, relu, deriv_relu),
    Dense(100, 100, relu, deriv_relu),
    Dense(100, 10, softmax, deriv_softmax)
]

In [9]:
def train_mst(x, t, eps=0.01):
    # 順伝播
    y = f_props(layers, x)

    # 誤差の計算
    cost = (- t * np_log(y)).sum(axis=1).mean()
    
    # 逆伝播
    delta = y - t
    b_props(layers, delta)

    # パラメータの更新
    update_params(layers, eps)

    return cost

In [10]:
def valid_mst(x, t):
    # 順伝播
    y = f_props(layers, x)
    
    # 誤差の計算
    cost = (- t * np_log(y)).sum(axis=1).mean()
    
    return cost, y

In [11]:
for epoch in range(10):
    x_train_mnist, t_train_mnist = shuffle(x_train_mnist, t_train_mnist)
    # オンライン学習
    for x, t in zip(x_train_mnist, t_train_mnist):
        cost = train_mst(x[None, :], t[None, :], eps=0.01)
    
    cost, y_pred = valid_mst(x_valid_mnist, t_valid_mnist)
    accuracy = accuracy_score(t_valid_mnist.argmax(axis=1), y_pred.argmax(axis=1))
    print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(epoch + 1, cost, accuracy))

EPOCH: 1, Valid Cost: 0.143, Valid Accuracy: 0.959
EPOCH: 2, Valid Cost: 0.137, Valid Accuracy: 0.958
EPOCH: 3, Valid Cost: 0.105, Valid Accuracy: 0.969
EPOCH: 4, Valid Cost: 0.113, Valid Accuracy: 0.967
EPOCH: 5, Valid Cost: 0.117, Valid Accuracy: 0.967
EPOCH: 6, Valid Cost: 0.100, Valid Accuracy: 0.973
EPOCH: 7, Valid Cost: 0.118, Valid Accuracy: 0.973
EPOCH: 8, Valid Cost: 0.107, Valid Accuracy: 0.974
EPOCH: 9, Valid Cost: 0.121, Valid Accuracy: 0.971
EPOCH: 10, Valid Cost: 0.122, Valid Accuracy: 0.974
