In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import chainer.functions as F
import chainer.links as L
from chainer import Variable, optimizers, Chain, cuda
import data
import pickle
import io

In [9]:
mnist = data.load_mnist_data()
x_all = mnist["data"].astype(np.float32) / 255
y_all = mnist["target"].astype(np.int32)
x_train, x_test = np.split(x_all, [60000])

In [10]:
class Model(Chain):
    def __init__(self):
        super(Model, self).__init__(
            l1=L.Linear(784, 400),
            l1b=L.Linear(400, 784),
            l2=L.Linear(400, 100),
            l2b=L.Linear(100, 400),
            l3=L.Linear(100, 49),
            l3b=L.Linear(49, 100),
            l4=L.Linear(49, 16),
            l4b=L.Linear(16, 49),
            l5=L.Linear(16, 3),
            l5b=L.Linear(3, 16) )
    def __call__(self, x, train=True, layer=0):
        tf = [False]
        for i in range(6):
            if i < layer-1: tf.append(False)
            else:               tf.append(train)
        # chainer は Variable しないと関数が chain してしまう
        x=Variable(x.data); h = F.dropout(F.relu(self.l1(x)), train=tf[1]);
        if layer == 1: return F.dropout(self.l1b(h), train=train), x
        x=Variable(h.data); h = F.dropout(F.relu(self.l2(x)), train=tf[2]);
        if layer == 2: return F.dropout(self.l2b(h), train=train), x
        x=Variable(h.data); h = F.dropout(F.relu(self.l3(x)), train=tf[3]);
        if layer == 3: return F.dropout(self.l3b(h), train=train), x
        x=Variable(h.data); h = F.dropout(F.relu(self.l4(x)), train=tf[4]);
        if layer == 4: return F.dropout(self.l4b(h), train=train), x
        x=Variable(h.data); h = F.dropout(F.relu(self.l5(x)), train=tf[5]);
        if layer == 5: return F.dropout(self.l5b(h), train=train), x
        return x
    def finetune(self, x, train=True):
        h = F.dropout(F.relu(self.l1(x)), train=False)
        h = F.dropout(F.relu(self.l1(h)), train=False)
        h = F.dropout(F.relu(self.l1(h)), train=True)
        h = F.dropout(F.relu(self.l1(h)), train=True)
        h = F.dropout(F.relu(self.l1(h)), train=True)
        return F.dropout(F.relu(self.l6(h)), train=True)
    def dump(self):
        pickle.dump(self.l1, io.open("l1.pkl", "wb"))
        pickle.dump(self.l2, io.open("l2.pkl", "wb"))
        pickle.dump(self.l3, io.open("l3.pkl", "wb"))
        pickle.dump(self.l4, io.open("l4.pkl", "wb"))
        pickle.dump(self.l5, io.open("l5.pkl", "wb"))

In [11]:
model = Model()

In [12]:
# if gpu
xp=np

In [13]:
optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(model)
batchsize = 100
datasize = 60000
epochs = 1

In [14]:
for layer in range(1, 6):
    optimizer.setup(model)
    for j in range(epochs):
        indexes = np.random.permutation(datasize)
        for i in range(0, datasize, batchsize):
            x = Variable(xp.asarray(x_train[indexes[i: i+batchsize]]))
            model.zerograds()
            img,x = model(x, layer=layer)
            loss = F.mean_squared_error(img, x)
            loss.backward()
            optimizer.update()
    x = Variable(xp.asarray(x_test))
    img, x = model(x, train=False, layer=layer)
    loss = F.mean_squared_error(img, x)
    print("layer:", layer, j, loss.data)
    model.dump()

layer: 1 0 0.10275529325008392
layer: 2 0 0.0005041405675001442
layer: 3 0 7.114500476745889e-05
layer: 4 0 8.6892832769081e-06
layer: 5 0 1.0151077276532305e-06
