# Iris データを 多層ニューラルネットワークで分類

- ニューラルネットの最終層（softmax への入力） には relu を用いない（活性化関数を二重に用いていることになる）
- 今回に限っては　データの標準化 によってあまり大きな違いは見られなかった
    - むしろ過学習してしまう傾向？
 
- batchnormalization と　dropout を実装

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline

from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from sklearn.datasets import load_iris

import chainer
import chainer.functions as F
import chainer.links as L
from chainer import Chain, Variable
from chainer import optimizers

from tqdm import tqdm



In [2]:
Iris = load_iris()
x = Iris.data.astype(np.float32)
t = Iris.target.astype(np.int32)

std = StandardScaler()
#x = std.fit_transform(x)

# stratify を指定することで，元データの正解ラベル比率を維持して訓練データとテストデータに分割してくれる
x_train, x_test, t_train, t_test = train_test_split(x, t, test_size=0.3, random_state=0, stratify=t)

In [8]:
# 値を最小値から最大値が 0~1 に収まるようにスケーリング
mms = MinMaxScaler()
mms.fit(x_train)

x_train_mms = mms.transform(x_train)
x_test_mms = mms.transform(x_test)



In [3]:
# ニューラルネットを定義

class IrisNN(Chain):
    def __init__(self, hidden=[100, 100, 100], train=True):
        
        initializer = chainer.initializers.HeNormal()
        
        super(IrisNN, self).__init__(
    
        l1 = L.Linear(4, hidden[0], initialW=initializer),
        l2 = L.Linear(hidden[0], hidden[1], initialW=initializer),
        l3 = L.Linear(hidden[1], hidden[2], initialW=initializer),
        l4 = L.Linear(hidden[2], 3, initialW=initializer)
        
        )
        self.train = train
        self.dr = 0.5
    
    def set_train_state(self, train):
        
        self.train = train
    
    def set_dropout_ration(self, ratio):
        self.dr = ratio
    
    def __call__(self, x):

        h = F.relu(self.l1(x))
        h = F.dropout(F.relu(self.l2(h)), train = self.train, ratio = self.dr)
        h = F.dropout(F.relu(self.l3(h)), train = self.train, ratio = self.dr)
        h = self.l4(h)   # softmax に渡す最後の層では rerlu を用いないことに注意
        
        return h
    

In [5]:
# Classifier, Trainer を用いて学習を実行

from chainer import iterators, training, datasets
from chainer.training import extensions

model = L.Classifier(IrisNN()) # 識別関数は デフォルトで softmax cross entropy
opt = optimizers.Adam(alpha= 0.001)
opt.use_cleargrads()
opt.setup(model)

# trainer で用いることが出来るように学習セットを作成 ([特徴量], ラベル)　の tuple にした後に， 学習，テスト用に分ける
# 第二引数は，　第一返り値のサイズ（学習セットのサイズ）

train, test = datasets.split_dataset_random(datasets.TupleDataset(x,t), 100)

# train-llop毎に用いるバッチサイズ，　エポック毎にシャッフルするか指定
train_iter = iterators.SerialIterator(train, batch_size=10, shuffle=True)
test_iter = iterators.SerialIterator(test, batch_size=1, repeat=False, shuffle=False)

# Trainign の際のバッチの与え方，　Optimizer(更新則)を与える
updater = training.StandardUpdater(train_iter, opt)

# 何エポック学習を行うか指定
trainer = training.Trainer(updater, (100, 'epoch'), out='result')


# trainer の拡張？を設定

trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())

trainer.run()

epoch       main/accuracy  validation/main/accuracy
[J1           0.35           0.42                      
[J2           0.3            0.46                      
[J3           0.52           0.46                      
[J4           0.54           0.5                       
[J5           0.57           0.58                      
[J6           0.53           0.66                      
[J7           0.59           0.58                      
[J8           0.59           0.64                      
[J9           0.64           0.72                      
[J10          0.77           0.74                      
[J     total [#####.............................................] 10.00%
this epoch [..................................................]  0.00%
       100 iter, 10 epoch / 100 epochs
       inf iters/sec. Estimated time to finish: 0:00:00.
[4A[J11          0.69           0.66                      
[J12          0.58           0.64                      
[J13          0.72

In [7]:
from chainer import iterators, training, datasets
from chainer.training import extensions


train, test = datasets.split_dataset_random(datasets.TupleDataset(x,t), 100)
train_iter = iterators.SerialIterator(train, batch_size=10, shuffle=True)
test_iter = iterators.SerialIterator(test, batch_size=10, repeat=False, shuffle=True)


model = L.Classifier(IrisNN()) # 識別関数は デフォルトで softmax cross entropy
opt = optimizers.Adam()
opt.setup(model)

# trainer の モデル評価に用いる際には， dropout を off にした状態で計算させる
eval_model = model.copy() # モデルのコピーでは，重みはコピー前と共有　→　model が学習したら eval_modelの重みも変化
eval_model.predictor.set_train_state(False)

updater = training.StandardUpdater(train_iter, opt)
trainer = training.Trainer(updater, (100, 'epoch'), out='result')
trainer.extend(extensions.Evaluator(test_iter, eval_model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())

trainer.run()

epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy
[J1           8.97831     3.83923               0.32           0.26                      
[J2           7.1831      1.16099               0.32           0.32                      
[J3           5.23253     0.527264              0.39           0.74                      
[J4           4.17998     0.560301              0.41           0.74                      
[J5           2.64144     0.371556              0.45           0.74                      
[J6           2.04245     0.212314              0.57           1                         
[J7           1.82452     0.276353              0.55           0.76                      
[J8           1.01005     0.336582              0.69           0.74                      
[J9           1.47368     0.3536                0.59           0.74                      
[J10          0.993669    0.307442              0.68           0.76                      
[J 

In [8]:
x

array([[ 5.0999999 ,  3.5       ,  1.39999998,  0.2       ],
       [ 4.9000001 ,  3.        ,  1.39999998,  0.2       ],
       [ 4.69999981,  3.20000005,  1.29999995,  0.2       ],
       [ 4.5999999 ,  3.0999999 ,  1.5       ,  0.2       ],
       [ 5.        ,  3.5999999 ,  1.39999998,  0.2       ],
       [ 5.4000001 ,  3.9000001 ,  1.70000005,  0.40000001],
       [ 4.5999999 ,  3.4000001 ,  1.39999998,  0.30000001],
       [ 5.        ,  3.4000001 ,  1.5       ,  0.2       ],
       [ 4.4000001 ,  2.9000001 ,  1.39999998,  0.2       ],
       [ 4.9000001 ,  3.0999999 ,  1.5       ,  0.1       ],
       [ 5.4000001 ,  3.70000005,  1.5       ,  0.2       ],
       [ 4.80000019,  3.4000001 ,  1.60000002,  0.2       ],
       [ 4.80000019,  3.        ,  1.39999998,  0.1       ],
       [ 4.30000019,  3.        ,  1.10000002,  0.1       ],
       [ 5.80000019,  4.        ,  1.20000005,  0.2       ],
       [ 5.69999981,  4.4000001 ,  1.5       ,  0.40000001],
       [ 5.4000001 ,  3.

In [27]:
# 学習を実行
# trainer 無し の場合はバッチ学習，　学習結果の評価を自分で実装する必要がある

model = IrisNN()
opt = optimizers.Adam(alpha= 0.001)
opt.setup(model)

n_epoch = 20
batchsize = 10
datasize = len(y_train)

for i in tqdm(xrange(n_epoch)):
    indices = np.random.permutation(datasize)
    
    for j in xrange(0, datasize, batchsize):
        x_tmp = Variable(x_train_mms[indices[j : j+batchsize]])
        t_tmp = Variable(t_train[indices[j : j+batchsize]])
        
        model.cleargrads()
        
        y = model(x_tmp)
        loss = F.softmax_cross_entropy(y, t_tmp)
        loss.backward()
        opt.update()
        

NameError: name 'y_train' is not defined

In [54]:
# ランダムフォレストで学習させる

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rfc = RandomForestClassifier()
rfc.fit(x_train, t_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [55]:
pred = rfc.predict(x_test)
print(accuracy_score(pred, t_test))

0.977777777778


In [10]:
import chainer

#chainer.get_device(0)

In [None]:
chainer.

AttributeError: 'dict' object has no attribute 'a'