In [1]:
from __future__ import division, print_function
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix, classification_report

import renom as rm
from renom.optimizer import Sgd

In [2]:
# Datapath must point to the directory containing the mldata folder.
data_path = "../dataset"
mnist = fetch_mldata('MNIST original', data_home=data_path)

X = mnist.data
y = mnist.target

# Rescale the image data to 0 ~ 1.
# 画像を 0-1 の間にスケーリングする
X = X.astype(np.float32)
X /= X.max()        # X = X / 255

# ラベルを one-hot 表現に変換する
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
labels_train = LabelBinarizer().fit_transform(y_train).astype(np.float32)
labels_test = LabelBinarizer().fit_transform(y_test).astype(np.float32)

# Training data size.
N = len(X_train)

In [14]:
class Mnist(rm.Model):

    def __init__(self):
        super(Mnist, self).__init__()
        self._layer1 = rm.Dense(100)
        self._layer2 = rm.Dense(10)

    def forward(self, x):
        out = self._layer2(rm.relu(self._layer1(x)))
        return out

    # 今回のカギとなる重みの正則化、weight_decay
    def weight_decay(self):
#         print("self._layer1.params.w: ", self._layer1.params.w.shape)
#         print("type(self._layer1.params.w): ", type(self._layer1.params.w.shape))
        weight_decay = rm.sum(self._layer1.params.w**2) + rm.sum(self._layer2.params.w**2)
#         print("weight_decay: ", weight_decay.shape)
#         print("type(weight_decay): ", type(weight_decay.shape)) 
#         print(weight_decay)
        return weight_decay

In [15]:
# Choose neural network.
network = Mnist()

In [None]:
# Hyper parameters
batch = 64
epoch = 10

optimizer = Sgd(lr = 0.1)

learning_curve = []
test_learning_curve = []

for i in range(epoch):
    perm = np.random.permutation(N)
    loss = 0
    for j in range(0, N // batch):
        train_batch = X_train[perm[j * batch:(j + 1) * batch]]
        responce_batch = labels_train[perm[j * batch:(j + 1) * batch]]

        # The computational graph is only generated for this block:
        with network.train():
            l = rm.softmax_cross_entropy(network(train_batch), responce_batch)
            if hasattr(network, "weight_decay"):
                l += 0.0001 * network.weight_decay()

        # Back propagation
        grad = l.grad()

        # Update
        grad.update(optimizer)

        # Changing type to ndarray is recommended.
        loss += l.as_ndarray()

    train_loss = loss / (N // batch)

    # Validation
    test_loss = rm.softmax_cross_entropy(network(X_test), labels_test).as_ndarray()
    test_learning_curve.append(test_loss)
    learning_curve.append(train_loss)
    print("epoch %03d train_loss:%f test_loss:%f"%(i, train_loss, test_loss))

195.87354
195.86139
195.87341
195.9146
196.04312
196.25327
196.48964
196.74301
197.031
197.35686
197.70244
198.05582
198.46622
198.85257
199.2575
199.73383
200.17578
200.60805
201.0354
201.4445
201.86548
202.28018
202.69522
203.07346
203.45836
203.81638
204.21092
204.56862
204.90506
205.23668
205.59102
205.88065
206.19029
206.46939
206.66516
206.89592
207.19144
207.48701
207.78896
208.09464
208.35316
208.59268
208.83783
209.10747
209.31432
209.56683
209.81416
210.0503
210.26991
210.52052
210.72598
210.93213
211.18051
211.40427
211.66946
211.94518
212.19891
212.44171
212.61519
212.77332
212.93396
213.1592
213.36533
213.55188
213.75095
213.89888
214.0719
214.26486
214.43706
214.60725
214.81223
215.01282
215.16475
215.34445
215.50964
215.65518
215.83313
215.99167
216.16591
216.3083
216.47029
216.59569
216.71782
216.81946
216.84358
216.97562
217.10828
217.27847
217.45502
217.55766
217.6843
217.81535
217.96292
218.12466
218.2678
218.36824
218.49748
218.61018
218.72438
218.89293
219.01865
21

258.10016
258.13596
258.19244
258.21448
258.2618
258.3101
258.28598
258.32928
258.39975
258.4627
258.4845
258.5376
258.60696
258.65323
258.687
258.74374
258.76126
258.8169
258.8885
258.93744
258.961
258.99734
259.00998
259.03394
259.0534
258.9652
258.96466
258.96768
258.96732
259.00064
259.03586
259.10776
259.13577
259.1349
259.1675
259.22977
259.2556
259.28326
259.2827
259.31262
259.3702
259.4115
259.4643
259.49728
259.56085
259.6094
259.64517
259.70944
259.73456
259.74518
259.78308
259.8052
259.8112
259.8319
259.85184
259.83118
259.84387
259.89377
259.9527
259.9632
259.93542
259.94678
259.99142
260.01456
260.05807
260.1272
260.12518
260.15347
260.1526
260.09247
260.10892
260.08344
260.13068
260.18756
260.2491
260.309
260.36813
260.43903
260.4771
260.52588
260.55832
260.61307
260.5944
260.6139
260.64502
260.68073
260.7093
260.78467
260.85852
260.85974
260.84995
260.87817
260.91168
260.84854
260.85995
260.89722
260.92682
260.9518
261.0174
261.04828
261.05557
261.0788
261.0798
261.1021


In [None]:
predictions = np.argmax(network(X_test).as_ndarray(), axis=1)

# Confusion matrix and classification report.
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

# Learning curve.
plt.plot(learning_curve, linewidth=3, label="train")
plt.plot(test_learning_curve, linewidth=3, label="test")
plt.title("Learning curve")
plt.ylabel("error")
plt.xlabel("epoch")
plt.legend()
plt.grid()
plt.show()