In [1]:
import numpy as np
from sklearn.utils import shuffle

In [2]:
#多層パーセプトロン
#Layerクラス

class Layer:
    def __init__(self,in_dim,out_dim,function,diff_function):
        #Xavier
        self.W = np.random.uniform(
                                    low=-np.sqrt(6./(in_dim+out_dim)), 
                                    high=np.sqrt(6./(in_dim+out_dim)), 
                                    size=(in_dim, out_dim))
        self.b = np.zeros(out_dim)
        self.function = function
        
        self.diff_function = diff_function
        self.u     = None
        self.delta = None

    #foward propagation
    def fprop(self,x):
        u = np.dot(x,self.W)+self.b
        z = self.function(u)
        self.u = u
        return z

    #back propagation
    def bprop(self,delta,W):
        delta = self.diff_function(self.u)*np.dot(delta,W.T)
        self.delta = delta
        return delta

In [3]:
#ネットワーク全体の順伝播
def fprops(layers, x):
    z = x
    for layer in layers:
        z = layer.fprop(z)
    return z

#ネットワーク全体の誤差逆伝播
def bprops(layers, delta):
    for i,layer in enumerate(layers[::-1]):
        if i==0:
            layer.delta = delta
        else:
            delta = layer.bprop(delta,_W)
        _W = layer.W

In [4]:
def sigmoid(x):
    return 1/(1+np.exp(-x))
def diff_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

def softmax(x):
    return np.exp(x)/np.sum(np.exp(x),axis=1)[:,np.newaxis]
def diff_softmax(x):
    return softmax(x)*(np.ones(x.shape)-softmax(x))

def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def diff_tanh(x):
    return 1-tanh(x)**2

In [5]:
#データセットの設定とネットワークの定義

#XOR
train_X = np.array([[0,1],[1,0],[0,0],[1,1]])
train_y = np.array([[1],[1],[0],[0]])
test_X,test_y = train_X,train_y

layers = [Layer(2,3,sigmoid,diff_sigmoid),
          Layer(3,1,sigmoid,diff_sigmoid)]

In [6]:
#train関数とtest関数
def train(X,d,eps=1):
    #forward propagation
    y = fprops(layers,X)
        
    #cost function & delta
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    delta = y-d
    
    #back propagation
    bprops(layers,delta)

    #update parameters
    z = X
    for layer in layers:
        dW = np.dot(z.T,layer.delta)
        db = np.dot(np.ones(len(z)),layer.delta)

        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db

        z = layer.fprop(z)
        
    #train cost
    y = fprops(layers,X)
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    
    return cost

def test(X,d):
    #test cost
    y = fprops(layers,X)
    cost = np.sum(-d*np.log(y)-(1-d)*np.log(1-y))
    return cost,y

In [7]:
#パラメータの更新 学習
#epoch
for epoch in range(100):
    #online learning
    train_X, train_y = shuffle(train_X, train_y)
    for x,y in zip(train_X,train_y):
        train(x[np.newaxis,:],y[np.newaxis,:])
    cost,pred_y = test(test_X,test_y)
print pred_y

[[ 0.97001731]
 [ 0.96789737]
 [ 0.02930604]
 [ 0.04483363]]


In [None]:
# 出力yはone-of-k表現
# 最終層の活性化関数はsoftmax関数，誤差関数は多クラス交差エントロピー
# 最終層のデルタは教科書参照

import matplotlib.pyplot as plt
import numpy

from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from sklearn.metrics import f1_score

from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original', data_home='.')

X, y = shuffle(mnist.data, mnist.target)
X = X / 255.0
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)