In [1]:
import sys
sys.path.append('../Modules')

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
%matplotlib inline
import NeuralNet as nn

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import accuracy_score

x, label = fetch_openml('mnist_784', version=1, return_X_y=True)
x = x / np.max(x, axis=1, keepdims=True)
x = x.reshape(-1, 28, 28, 1)
x_train, x_test, label_train, label_test = train_test_split(x, label, test_size=0.1)
y_train = LabelBinarizer().fit_transform(label_train)

In [2]:
class ConvolutionalNeuralNetwork(nn.Network):
    
    def __init__(self):
        truncnorm = st.truncnorm(a=-2, b=2, scale=0.1)
        super().__init__(
            w1=truncnorm.rvs((5, 5, 1, 20)),
            b1=np.zeros(20) + 0.1,
            w2=truncnorm.rvs((5, 5, 20, 20)),
            b2=np.zeros(20) + 0.1,
            w3=truncnorm.rvs((4 * 4 * 20, 500)),
            b3=np.zeros(500) + 0.1,
            w4=truncnorm.rvs((500, 10)),
            b4=np.zeros(10) + 0.1
        )
        
    def __call__(self, x, y=None):
        h = nn.relu(nn.convolve2d(x, self.w1) + self.b1)
        h = nn.max_pooling2d(h, (2, 2), (2, 2))
        
        h = nn.relu(nn.convolve2d(h, self.w2) + self.b2)
        h = nn.max_pooling2d(h, (2, 2), (2, 2))
        
        h = h.reshape(-1, 4 * 4 * 20)
        h = nn.relu(h @ self.w3 + self.b3)
        
        self.py = nn.random.Categorical(logit=h @ self.w4 + self.b4, data=y)
        return self.py.mu.value

model = ConvolutionalNeuralNetwork()
optimizer = nn.optimizer.Adam(model, 1e-3)

while True:
    indices = np.random.permutation(len(x_train))
    for index in range(0, len(x_train), 50):
        model.clear()
        x_batch = x_train[indices[index: index + 50]]
        y_batch = y_train[indices[index: index + 50]]
        prob = model(x_batch, y_batch)
        log_likelihood = model.log_pdf()
        if optimizer.n_iter % 100 == 0:
            accuracy = accuracy_score(
                np.argmax(y_batch, axis=-1), np.argmax(prob, axis=-1)
            )
            print("step {:04d}".format(optimizer.n_iter), end=", ")
            print("accuracy {:.2f}".format(accuracy), end=", ")
            print("Log Likelihood {:g}".format(log_likelihood.value))
        log_likelihood.backward()
        optimizer.update()
        if optimizer.n_iter == 1000:
            break
    else:
        continue
    break
    
label_pred = []
for i in range(0, len(x_test), 50):
    label_pred.append(np.argmax(model(x_test[i: i + 50]), axis=-1))
label_pred = np.asarray(label_pred).ravel()
print("accuracy (test):", accuracy_score(np.array(list(map(int,label_test))), label_pred))

step 0000, accuracy 0.12, Log Likelihood -144.249
step 0100, accuracy 0.92, Log Likelihood -15.8367
step 0200, accuracy 0.92, Log Likelihood -14.7236
step 0300, accuracy 0.98, Log Likelihood -2.44036
step 0400, accuracy 0.98, Log Likelihood -3.99714
step 0500, accuracy 0.94, Log Likelihood -5.68415
step 0600, accuracy 1.00, Log Likelihood -0.457031
step 0700, accuracy 0.98, Log Likelihood -5.73995
step 0800, accuracy 0.98, Log Likelihood -10.6341
step 0900, accuracy 0.98, Log Likelihood -1.96297
accuracy (test): 0.98
