In [1]:
import pickle
import numpy as np
from numpy.random import *

In [2]:
save_file = '/Users/komei0727/workspace/robot_intelligence/data/mnist.pkl'
with open(save_file, 'rb') as f:
    dataset = pickle.load(f)

train_img,train_label,test_img,test_label = dataset

#画素値を0~1の間の値に変換
train_img = train_img / 255
test_img = test_img / 255
train_img = train_img.astype(np.float32)
#ラベルをone-hot表現で表す
train_label = [int(x) for x in train_label]
train_label_one_hot = np.identity(10)[train_label].astype(np.float32)
test_label = [int(x) for x in test_label]
test_label_one_hot = np.identity(10)[test_label]

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    e = np.exp(x)
    return e / np.sum(e)

def sigmoid_dash(x):
    return x * (1 - x)

def cross_entropy(y, t):
    E = 0.0
    for i in range(len(t[0])):
        E = E - t[0][i]*np.log(y[0][i])
    return E.astype(np.float32)

In [4]:
#各レイヤで共通な機能を実装
class Layer(object):
    def __init__(self, lr=0.01):
        self.params = {}
        self.grads = {}
        self.lr = lr
        
    def update(self):
        for k in self.params.keys():
            self.params[k] = self.params[k] - self.lr * self.grads[k]
            
    def zerograd(self):
        for k in self.params.keys():
            self.grads[k] = np.zeros(shape = self.params[k].shape, dtype = self.params[k].dtype)
            
#順伝播、逆伝播を行う機能を実装
class Sequential:
    def __init__(self, layers=[]):
        self.layers = layers
    #レイヤの追加
    def addlayer(self, layer):
        self.layers.append(layer)
    #順伝播の計算
    def forward(self, x):
        for l in self.layers:
            x = l.forward(x)
        return x
    #逆伝播の計算
    def backward(self, y):
        for l in reversed(self.layers):
            y = l.backward(y)
        return y
    #
    def update(self):
        for l in self.layers:
            l.update()
    #勾配をゼロに
    def zerograd(self):
        for l in self.layers:
            l.zerograd()

class LinearLayer(Layer):
    def __init__(self, input_dim, output_dim):
        super(LinearLayer, self).__init__()
        #正規分布に従った乱数による重みの初期化（Xivierの初期値）
        self.params['W'] = np.random.normal(loc = 0.0, scale = np.sqrt(1.0/input_dim), size = (input_dim, output_dim)).astype(np.float32)
        #バイアスの初期値をゼロに設定
        self.params['b'] = np.zeros(shape = (1, output_dim), dtype = np.float32)
    
    def forward(self, x):
        self.x = x
        return np.dot(x, self.params['W']) + self.params['b']
        
    def backward(self, y):
        self.grads['W'] = np.dot(self.x.T, y)
        self.grads['b'] = y
        #print(self.grads['b'])
        return y*self.params['W']
    
class SigmoidLayer(Layer):
    def __init__(self):
        super(SigmoidLayer, self).__init__()
    
    def forward(self, x):
        z = sigmoid(x)
        self.z = z
        return z
    
    def backward(self, y):
        return  np.array([np.sum(y*sigmoid_dash(self.z).T, axis=1)])     
    
class Classfier:
    def __init__(self, model):
        self.model = model
        
    def update(self, x, t):
        self.model.zerograd()
        y = self.model.forward(x)
        prob = softmax(y)
        loss = cross_entropy(prob, t)
        dout = prob - t
        dout = self.model.backward(dout)
        self.model.update()
    
    def test(self, x):
        y = softmax(self.model.forward(x))
        prob = np.array([y[0]])
        return prob

In [5]:
model = Sequential()
model.addlayer(LinearLayer(784,1000))
model.addlayer(SigmoidLayer())
model.addlayer(LinearLayer(1000,10))
classifier = Classfier(model)

In [6]:
for i in range(100):
    rand1 = np.arange(60000)
    shuffle(rand1)
    rand2 = np.arange(10000)
    shuffle(rand2)
    train_img = train_img[rand1,:]
    train_label_one_hot = train_label_one_hot[rand1,:]
    test_img = test_img[rand2]
    test_label_one_hot = np.array(test_label_one_hot[rand2,:])
    for j in range(200):
        x = np.array([train_img[j]])
        t = np.array([train_label_one_hot[j]])
        classifier.update(x, t)
    count = 0
    for j in range(100):
        x = np.array([test_img[j]])
        prob = classifier.test(x)
        if np.argmax(prob[0]) == np.argmax(test_label_one_hot[j]):
            count += 1
    print(i+1)
    print(count/100)

1
0.13
2
0.14
3
0.62
4
0.38
5
0.26
6
0.71
7
0.67
8
0.67
9
0.7
10
0.7
11
0.65
12
0.78
13
0.74
14
0.79
15
0.81
16
0.82
17
0.72
18
0.74
19
0.6
20
0.81
21
0.83
22
0.85
23
0.85
24
0.83
25
0.86
26
0.79
27
0.82
28
0.77
29
0.84
30
0.86
31
0.91
32
0.86
33
0.81
34
0.87
35
0.85
36
0.82
37
0.85
38
0.83
39
0.87
40
0.83
41
0.86
42
0.88
43
0.9
44
0.82
45
0.77
46
0.85
47
0.85
48
0.88
49
0.93
50
0.83
51
0.8
52
0.87
53
0.89
54
0.84
55
0.81
56
0.82
57
0.87
58
0.89
59
0.91
60
0.9
61
0.87
62
0.92
63
0.81
64
0.83
65
0.88
66
0.85
67
0.9
68
0.89
69
0.91
70
0.9
71
0.88
72
0.86
73
0.93
74
0.86
75
0.91
76
0.87
77
0.89
78
0.87
79
0.89
80
0.82
81
0.87
82
0.92
83
0.87
84
0.94
85
0.9
86
0.92
87
0.84
88
0.91
89
0.82
90
0.84
91
0.85
92
0.83
93
0.88
94
0.89
95
0.86
96
0.9
97
0.91
98
0.89
99
0.84
100
0.86
