In [1]:
import numpy as np
from sklearn.utils import shuffle

def sigmoid(x):
    return 1/(1+np.exp(-x))
def diff_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp)
def diff_softmax(x):
    return softmax(x)*(np.ones(x.shape)-softmax(x))

def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def diff_tanh(x):
    return 1-tanh(x)**2

In [2]:
class Layer:
    def __init__(self,in_dim,out_dim,function,diff_function):
        #Xavier
        self.W = np.random.uniform(
                                    low=-np.sqrt(6./(in_dim+out_dim)), 
                                    high=np.sqrt(6./(in_dim+out_dim)), 
                                    size=(in_dim, out_dim))
        self.b = np.zeros(out_dim)
        self.function = function
        
        self.diff_function = diff_function
        self.u     = None
        self.delta = None

    #foward propagation
    def fprop(self,x):
        #print "shape self.W, self.b", self.W.shape, self.b
        u = np.dot(x,self.W)+self.b
        z = self.function(u)
        self.u = u
        return z

    #back propagation
    def bprop(self,delta,W):
        delta = self.diff_function(self.u)*np.dot(delta,W.T)
        self.delta = delta
        return delta

#ネットワーク全体の順伝播
def fprops(layers, x):
    z = x
    for layer in layers:
        z = layer.fprop(z)
    return z

#ネットワーク全体の誤差逆伝播
def bprops(layers, delta):
    for i,layer in enumerate(layers[::-1]):
        if i==0:
            layer.delta = delta
        else:
            delta = layer.bprop(delta,_W)
        _W = layer.W

In [3]:
#train関数とtest関数
def train(X,d,eps=0.01):
    #forward propagation
    y = fprops(layers,X)
        
    #cost function & delta
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    delta = y-d
    
    #back propagation
    bprops(layers,delta)

    #update parameters
    z = X
    #print "shape z", z.shape

    for layer in layers:
        #print "shape layer.delta", layer.delta.shape
        layer.delta = np.atleast_2d(layer.delta)
        z = np.atleast_2d(z)
        dW = np.dot(layer.delta.T,z)
        db = np.dot(layer.delta.T,np.ones(len(z)))

        layer.W = layer.W - eps*dW.T
        layer.b = layer.b - eps*db.T

        z = layer.fprop(z)
        
    #train cost
    y = fprops(layers,X)
    #print "shape y0 (1,10)", y.shape
    #print "shape d0 ()", d.shape
    
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    
    return cost

def test(X):
    #test cost
    y = fprops(layers,X)
    #print "shape y", y.shape
    #print "shape d", d.shape
    
    #cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    #return cost,y
    return y

In [None]:
# 出力yはone-of-k表現
# 最終層の活性化関数はsoftmax関数，誤差関数は多クラス交差エントロピー
# 最終層のデルタは教科書参照

import matplotlib.pyplot as plt
import numpy

from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original')

X, y = shuffle(mnist.data, mnist.target)
X = X / 255.0
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

# 教師信号の数字を1-of-K表記に変換
labels_train = LabelBinarizer().fit_transform(train_y)
labels_test = LabelBinarizer().fit_transform(test_y)

layers = [Layer(28*28,100,tanh,diff_tanh),
          Layer(100,10,softmax,diff_softmax)]

#パラメータの更新 学習
#epoch
#mlp.fit(X_train, labels_train, learning_rate=0.01, epochs=100000)

#print train_X.shape
#print train_y.shape
#print labels_train.shape

for epoch in range(1):
    #print epoch
    #online learning
    train_X, train_y = shuffle(train_X, labels_train)
    for x,y in zip(train_X,labels_train):
        train(x,y)
    #cost,pred_y = test(test_X,test_y)
#print pred_y

# テストデータを用いて予測精度を計算
predictions = []
for i in range(test_X.shape[0]):
    o = test(test_X[np.newaxis,i])
    predictions.append(np.argmax(o))
print confusion_matrix(test_y, predictions)
print classification_report(test_y, predictions)
