In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [2]:
dataset = load_digits()
dataset

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 'target': array([0, 1, 2, ..., 8, 9, 8]),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7',
  'pixel_5_0',
  'pixel_5_1',
 

In [3]:
dataset.data.shape

(1797, 64)

In [4]:
dataset.images.shape

(1797, 8, 8)

In [5]:
dataset.target.shape

(1797,)

In [6]:
X = dataset.data
y = dataset.target

# one-hot
y = np.eye(10)[y]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1437, 64), (360, 64), (1437, 10), (360, 10))

In [8]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps)


def cross_entropy_error(Y_pred, Y_gt):
    delta = 1e-7
    return -np.sum(Y_gt * np.log(Y_pred + delta))


def root_mean_squired_error(Y_pred, Y_gt):
    return np.sqrt(np.mean((Y_pred - Y_gt) ** 2))

In [9]:
epochs = 80
lr = 0.001

D_in = X_train.shape[1]
H1_layer = 128
H2_layer = 32
# D_out = len(np.unique(y_train))
D_out = y_train.shape[1]

In [10]:
W1 = np.random.randn(D_in, H1_layer)
W2 = np.random.randn(H1_layer, H2_layer)
W3 = np.random.randn(H2_layer, D_out)

In [11]:
b1 = np.random.randn(1, H1_layer)
b2 = np.random.randn(1, H2_layer)
b3 = np.random.randn(1, D_out)

In [12]:
for epoch in range(epochs):

    # train
    Y_pred_train = []
    for x, y in zip(X_train, y_train):

        # forward
        x = x.reshape(-1, 1)

        # layer 1
        net1 = x.T @ W1 + b1
        out1 = sigmoid(net1)

        # layer 2
        net2 = out1 @ W2 + b2
        out2 = sigmoid(net2)

        # layer 3
        net3 = out2 @ W3 + b3
        out3 = softmax(net3)

        y_pred = out3
        Y_pred_train.append(y_pred.T)

        # back propagation
        # layer 3
        error = -2 * (y - y_pred)
        grad_W3 = out2.T @ error
        grad_b3 = error

        # layer 2
        error = error @ W3.T * out2 * (1 - out2)
        grad_W2 = out1.T @ error
        grad_b2 = error

        # layer 1
        error = error @ W2.T * out1 * (1 - out1)
        grad_W1 = x @ error
        grad_b1 = error

        # update
        # layer 1
        W1 = W1 - lr * grad_W1
        b1 = b1 - lr * grad_b1

        # layer 2
        W2 = W2 - lr * grad_W2
        b2 = b2 - lr * grad_b2

        # layer 3
        W3 = W3 - lr * grad_W3
        b3 = b3 - lr * grad_b3

    Y_pred_train = np.array(Y_pred_train).reshape(-1, 10)
    loss_train = root_mean_squired_error(Y_pred_train, y_train)
    acc_train = np.mean(np.argmax(Y_pred_train, axis=1) == np.argmax(y_train, axis=1))

    # test
    Y_pred_test = []
    for x, y in zip(X_test, y_test):

        # forward
        x = x.reshape(-1, 1)

        # layer 1
        net1 = x.T @ W1 + b1
        out1 = sigmoid(net1)

        # layer 2
        net2 = out1 @ W2 + b2
        out2 = sigmoid(net2)

        # layer 3
        net3 = out2 @ W3 + b3
        out3 = softmax(net3)

        y_pred = out3
        Y_pred_test.append(y_pred.T)

    Y_pred_test = np.array(Y_pred_test).reshape(-1, 10)
    loss_test = root_mean_squired_error(Y_pred_test, y_test)
    acc_test = np.mean(np.argmax(Y_pred_test, axis=1) == np.argmax(y_test, axis=1))

    print('loss train:', loss_train, 'accuracy train:', acc_train)
    print('loss test:', loss_test, 'accuracy test:', acc_test)

print('train completed!')


loss train: 0.3267476797305985 accuracy train: 0.14335421016005567
loss test: 0.30643636749018344 accuracy test: 0.18888888888888888
loss train: 0.29107603787540515 accuracy train: 0.2755741127348643
loss test: 0.28272570207302755 accuracy test: 0.34444444444444444
loss train: 0.26813765797568906 accuracy train: 0.43075852470424497
loss test: 0.26805103589837087 accuracy test: 0.41388888888888886
loss train: 0.2535795475806087 accuracy train: 0.5337508698677801
loss test: 0.25818283051880875 accuracy test: 0.49166666666666664
loss train: 0.2422366093569188 accuracy train: 0.5845511482254697
loss test: 0.24909336172838795 accuracy test: 0.5361111111111111
loss train: 0.23200894232562289 accuracy train: 0.6318719554627696
loss test: 0.24157926011824918 accuracy test: 0.5833333333333334
loss train: 0.22228775628394223 accuracy train: 0.6694502435629784
loss test: 0.23561516032508567 accuracy test: 0.6166666666666667
loss train: 0.21369206967523385 accuracy train: 0.7132915796798887
loss t