In [2]:
## アヤメの種類を学習するCNNの実装
## 誤差をバッチごと累積して重みを更新する
import numpy as np
from chainer import cuda, Function, gradient_check,\
    Variable, optimizers, serializers, utils
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L

In [3]:
from sklearn import datasets

## アヤメに関する4次元x150個のデータ
iris = datasets.load_iris()
## 入力データ
X = iris.data.astype(np.float32)
## 訓練データ
Y = iris.target
## 入力データサイズ
N = Y.size

print ("shape of iris.data : ", iris.data.shape)

shape of iris.data :  (150, 4)


In [4]:
## アヤメは3種類に分類されるので訓練データ3N次元
Y2 = np.zeros(3*N).reshape(N,3).astype(np.float32)
for i in range(N):
## 正解の種類は1.0, それ以外の2種類は0.0とする
    Y2[i,Y[i]] = 1.0

In [5]:
## 奇数noデータを訓練データ,偶数noデータを検証用に設定
index = np.arange(N)
xtrain =  X[index[index % 2 != 0],:]
ytrain = Y2[index[index % 2 != 0],:]
xtest  =  X[index[index % 2 == 0],:]
yans   =  Y[index[index % 2 == 0]]

In [6]:
## 4x6x3のCNNモデル定義
class IrisChain(Chain):
    def __init__(self):
        super(IrisChain, self).__init__(
            l1 = L.Linear(4,6),
            l2 = L.Linear(6,3),
        )
        
    ## callで誤差関数を定義
    def __call__(self, x, y):
        return F.mean_squared_error(self.fwd(x), y)
    
    ## 順伝搬はcallと別に定義
    def fwd(self, x):
        h1 = F.sigmoid(self.l1(x))
        h2 = self.l2(h1)
        return h2

In [7]:
## CNNモデル初期化と最適化手法設定
model = IrisChain()
optimizer = optimizers.SGD()
optimizer.setup(model)

In [10]:
## ミニバッチを作成して学習開始
n = 74     # 訓練データ数
bs = 25    # バッチサイズ
for j in range(5000):
    accum_loss = None
    ## 入力データをランダムに入れ換えてバッチ化
    sffindx = np.random.permutation(n)
    for i in range(0, n, bs):
        x = Variable(xtrain[sffindx[i:(i+bs) if (i+bs)<n else n]])
        y = Variable(ytrain[sffindx[i:(i+bs) if (i+bs)<n else n]])
        model.zerograds()
        loss = model(x,y)
        ## バッチサイズごとに誤差を積算する
        accum_loss = loss if accum_loss is None else accum_loss + loss
    loss.backward()
    optimizer.update()

In [11]:
## 検証用データxtで学習結果をテスト
xt = Variable(xtest, volatile='on')
yy = model.fwd(xt)
ans = yy.data
nrow, ncol = ans.shape

In [12]:
ok = 0
for i in range(nrow):
    ## 確率が最大の結果をclsに出力
    cls = np.argmax(ans[i,:])
    print (ans[i,:], cls)
    if cls == yans[i]:
        ok += 1

print ("correct rate : ", ok, "/", nrow, "=", (ok*1.0)/nrow)

[ 1.01038098 -0.02295017  0.01524439] 0
[ 0.99811667  0.00662816 -0.00486591] 0
[ 1.01528001 -0.01880449  0.01912084] 0
[ 1.00482774  0.01505142  0.0036808 ] 0
[ 0.96302927  0.05005854 -0.03422555] 0
[ 1.01507831 -0.0420292   0.02631649] 0
[ 0.97660446  0.00554806 -0.01784101] 0
[ 1.03194702 -0.04493427  0.03923669] 0
[ 1.03034925 -0.01445496  0.03023872] 0
[ 1.01104689 -0.05175972  0.02958629] 0
[ 0.98694873 -0.03172249  0.00794801] 0
[ 1.02954769  0.01639771  0.01592061] 0
[ 0.96446419  0.01931661 -0.00645229] 0
[ 0.99246478  0.00618362  0.00253645] 0
[ 1.00509357 -0.02639735  0.01103058] 0
[ 0.96622992  0.02182168 -0.01847574] 0
[ 1.02928793 -0.03819835  0.0401285 ] 0
[ 0.97735029 -0.00233483 -0.01153341] 0
[ 1.01377583 -0.0414952   0.0193471 ] 0
[ 0.98201042  0.03456974 -0.022751  ] 0
[ 1.01511598 -0.00643277  0.01290843] 0
[ 0.9973712   0.02349782 -0.0080094 ] 0
[ 0.99961472 -0.0105474   0.02087966] 0
[ 1.01583004 -0.02929157  0.02732   ] 0
[ 1.01526022 -0.03695452  0.02548078] 0
