In [1]:
import numpy as np
from sklearn.utils import shuffle

def sigmoid(x):
    return 1/(1+np.exp(-x))
def diff_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp)
def diff_softmax(x):
    return softmax(x)*(np.ones(x.shape)-softmax(x))

def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def diff_tanh(x):
    return 1-tanh(x)**2

In [2]:
class Layer:
    def __init__(self,in_dim,out_dim,function,diff_function):
        #Xavier
        self.W = np.random.uniform(
                                    low=-np.sqrt(6./(in_dim+out_dim)), 
                                    high=np.sqrt(6./(in_dim+out_dim)), 
                                    size=(in_dim, out_dim))
        self.b = np.zeros(out_dim)
        self.function = function
        
        self.diff_function = diff_function
        self.u     = None
        self.delta = None

    #foward propagation
    def fprop(self,x):
        u = np.dot(x,self.W)+self.b
        z = self.function(u)
        self.u = u
        return z

    #back propagation
    def bprop(self,delta,W):
        delta = self.diff_function(self.u)*np.dot(delta,W.T)
        self.delta = delta
        return delta

#ネットワーク全体の順伝播
def fprops(layers, x):
    z = x
    for layer in layers:
        z = layer.fprop(z)
    return z

#ネットワーク全体の誤差逆伝播
def bprops(layers, delta):
    for i,layer in enumerate(layers[::-1]):
        if i==0:
            layer.delta = delta
        else:
            delta = layer.bprop(delta,_W)
        _W = layer.W

In [3]:
#train関数とtest関数
def train(X,d,eps=0.01):
    #forward propagation
    y = fprops(layers,X)
        
    #cost function & delta
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    delta = y-d
    
    #back propagation
    bprops(layers,delta)

    #update parameters
    z = X

    for layer in layers:
        layer.delta = np.atleast_2d(layer.delta)
        z = np.atleast_2d(z)
        dW = np.dot(z.T, layer.delta)
        db = np.dot(np.ones(len(z)),layer.delta)

        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db

        z = layer.fprop(z)
        
    #train cost
    y = fprops(layers,X)
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    
    return cost

def test(X):
    #test cost
    y = fprops(layers,X)
    #cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    #return cost,y
    return y

In [4]:
# 出力yはone-of-k表現
# 最終層の活性化関数はsoftmax関数，誤差関数は多クラス交差エントロピー
# 最終層のデルタは教科書参照

import matplotlib.pyplot as plt
import numpy

from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original')

X, y = shuffle(mnist.data, mnist.target)
X = X / 255.0
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

# 教師信号の数字を1-of-K表記に変換
labels_train = LabelBinarizer().fit_transform(train_y)
#labels_test = LabelBinarizer().fit_transform(test_y)

layers = [Layer(28*28,100,tanh,diff_tanh),
          Layer(100,10,softmax,diff_softmax)]

#パラメータの更新 学習


for epoch in range(1):
    #print epoch
    #online learning
    #train_X, train_y = shuffle(train_X, labels_train)
    for x,y in zip(train_X,labels_train):
        cost = train(x,y)
#cost,pred_y = test(test_X,test_y)
    print cost

# テストデータを用いて予測精度を計算
predictions = []
for i in range(test_X.shape[0]):
    o = test(test_X[i])
    predictions.append(np.argmax(o))
print confusion_matrix(test_y, predictions)
print classification_report(test_y, predictions)


2.58632390323e-05
[[1326    0   49    9    1   31    9    5    5    0]
 [   0 1518   16    0    6    6    3    8    3    1]
 [   2    3 1320    6    6    4    3    8   13    0]
 [   1    2   41 1290    1   40    2   16   23    6]
 [   2    0   24    1 1322    3   10   11    5   16]
 [   3    0    5   10    7 1172    7    3    9    5]
 [   3    0   32    3   16   24 1310    0    2    0]
 [   2    6   16    4   10    4    0 1433    1   16]
 [   5   31   69   31   22   67    5    8 1127    9]
 [   8    8   11   16   69   15    0   61   18 1140]]
             precision    recall  f1-score   support

        0.0       0.98      0.92      0.95      1435
        1.0       0.97      0.97      0.97      1561
        2.0       0.83      0.97      0.90      1365
        3.0       0.94      0.91      0.92      1422
        4.0       0.91      0.95      0.93      1394
        5.0       0.86      0.96      0.91      1221
        6.0       0.97      0.94      0.96      1390
        7.0       0.92    