In [82]:
import numpy as np
import pandas as pd
import chainer
import chainer.functions as F
import chainer.links as L
from chainer.cuda import to_cpu
from sklearn.metrics import accuracy_score, confusion_matrix
import urllib.request

import my_libs.network as my_net
import my_libs.preprocess as my_process
from my_libs.load_data import load_new_dataset
from run_evaluation import learn_network_model

### モデルの学習

In [5]:
setting = {
    'epoch': 2,
    'batch_size': 32,
    'use_gpu': True,
    'fixed_base_w': True
}

vgg1 = learn_network_model(my_process.Processing_11(), my_net.VGG(), setting)

epoch       main/accuracy  validation/main/accuracy  main/loss   validation/main/loss  elapsed_time
[J1           0.936508       0.9625                    0.192835    0.0926208             150.564       
[J2           0.96371        0.97                      0.108717    0.0894009             299.423       
Preprocess             Processing_11
Model                            VGG
Elapsed time                 299.423
Validation accuracy             0.97
batch_size                        32
epoch                              2
fixed_base_w                    True
use_gpu                         True
dtype: object


In [89]:
setting = {
    'epoch': 2,
    'batch_size': 32,
    'use_gpu': True,
    'fixed_base_w': True
}

vgg2 = learn_network_model(my_process.Processing_11(), my_net.VGG_3(), setting)

epoch       main/accuracy  validation/main/accuracy  main/loss   validation/main/loss  elapsed_time
[J1           0.900794       0.94375                   9.82045     0.820429              147.112       
[J2           0.953125       0.965                     0.476862    0.285522              293.962       
Preprocess             Processing_11
Model                          VGG_3
Elapsed time                 293.962
Validation accuracy            0.965
batch_size                        32
epoch                              2
fixed_base_w                    True
use_gpu                         True
dtype: object


In [90]:
setting = {
    'epoch': 2,
    'batch_size': 32,
    'use_gpu': True,
    'fixed_base_w': True
}

vgg3 = learn_network_model(my_process.Processing_11(), my_net.VGG_2(), setting)

epoch       main/accuracy  validation/main/accuracy  main/loss   validation/main/loss  elapsed_time
[J1           0.874504       0.96125                   8.19846     0.11324               150.002       
[J2           0.944556       0.96                      0.153258    0.102175              299.649       
Preprocess             Processing_11
Model                          VGG_2
Elapsed time                 299.649
Validation accuracy             0.96
batch_size                        32
epoch                              2
fixed_base_w                    True
use_gpu                         True
dtype: object


### testデータで推測してみる

In [91]:
# load dataset: tuple(path, label)
train, valid, test = load_new_dataset('../new_dataset/dataset/data/')
len(train), len(valid), len(test)

(2000, 800, 1000)

In [2]:
def get_target_label(tuple_dataset):
    return [tpl[1] for tpl in tuple_dataset]

def model_predict(tuple_dataset, model, gpu_id=0):
    model.to_gpu(gpu_id)

    predicted = []
    for img, label in tuple_dataset:
        img = np.array([img])
        img = model.xp.asarray(img)

        with chainer.using_config('train', False), chainer.using_config('enable_backprp', False):
            predict = model.predictor(img)

        predict = to_cpu(predict.data)
        predicted.append(np.argmax(predict))

    model.to_cpu()
    return predicted

def evaluate_predict(target_labels, predicted):
    print(accuracy_score(test_t, predicted))
    print(confusion_matrix(test_t, predicted))

In [3]:
# transform
preprocess = my_process.Processing_11()
train = preprocess.transform(train)
valid = preprocess.transform(valid)
test = preprocess.transform(test)

train_t = get_target_label(train)
valid_t = get_target_label(valid)
test_t = get_target_label(test)

NameError: name 'my_process' is not defined

In [1]:
# アンサンブル前のモデルのテストデータ精度
predicted = model_predict(test, vgg1)
evaluate_predict(test_t, predicted)

NameError: name 'model_predict' is not defined

### Ensemble学習
Test Time augumentaion

In [111]:
%%time
TTA_N = 30
predict_sum = np.zeros(len(test_t))
for i in range(TTA_N):
    predict_sum += model_predict(test, vgg1)
    print('%s/%s cumputed.' % (i+1, TTA_N))

predict_tta = (predict_sum/TTA_N > 0.5).astype(int)

  " Skipping tag %s" % (size, len(data), tag))


1/30 cumputed.
2/30 cumputed.
3/30 cumputed.
4/30 cumputed.
5/30 cumputed.
6/30 cumputed.
7/30 cumputed.
8/30 cumputed.
9/30 cumputed.
10/30 cumputed.
11/30 cumputed.
12/30 cumputed.
13/30 cumputed.
14/30 cumputed.
15/30 cumputed.
16/30 cumputed.
17/30 cumputed.
18/30 cumputed.
19/30 cumputed.
20/30 cumputed.
21/30 cumputed.
22/30 cumputed.
23/30 cumputed.
24/30 cumputed.
25/30 cumputed.
26/30 cumputed.
27/30 cumputed.
28/30 cumputed.
29/30 cumputed.
30/30 cumputed.
CPU times: user 13min 41s, sys: 3min 53s, total: 17min 34s
Wall time: 17min 34s


In [112]:
evaluate_predict(test_t, predict_tta)

0.96
[[468  32]
 [  8 492]]


In [125]:
%%time
TTA_N = 10
predict_sum = np.zeros(len(test_t))
for i in range(TTA_N):
    predict_sum += model_predict(test, vgg1)
    print('%s/%s cumputed.' % (i+1, TTA_N))

predict_tta = (predict_sum/TTA_N > 0.5).astype(int)
evaluate_predict(test_t, predict_tta)

  " Skipping tag %s" % (size, len(data), tag))


1/10 cumputed.
2/10 cumputed.
3/10 cumputed.
4/10 cumputed.
5/10 cumputed.
6/10 cumputed.
7/10 cumputed.
8/10 cumputed.
9/10 cumputed.
10/10 cumputed.
0.959
[[472  28]
 [ 13 487]]
CPU times: user 4min 31s, sys: 1min 18s, total: 5min 50s
Wall time: 5min 50s


Random seed average

In [96]:
def make_stacked_predict(tuple_dataset, model_list):
    predicts = pd.DataFrame()
    for i, model in enumerate(model_list):
        predicts['model_%s' % i] = model_predict(tuple_dataset, model)
    return predicts

In [16]:
model_list = [vgg1, vgg1, vgg1, vgg1, vgg1]
train_predicts = make_stacked_predict(test, model_list)

  " Skipping tag %s" % (size, len(data), tag))


In [19]:
avg_predict = (predict_df.mean(axis=1) > 0.5).astype(int)
evaluate_predict(test_t, avg_predict)

0.953
[[466  34]
 [ 13 487]]


#### Stacking

In [48]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [33]:
%%time
model_list = [vgg1, vgg1, vgg1, vgg1, vgg1]

train_predicts_x = make_stacked_predict(train, model_list)
valid_predicts_x = make_stacked_predict(valid, model_list)
test_predicts_x = make_stacked_predict(test, model_list)

  " Skipping tag %s" % (size, len(data), tag))


In [50]:
# stackedモデルの学習
stk_model = LinearRegression()
stk_model.fit(train_predicts_x, train_t)

# stackedモデルの予測(valid)
valid_stk_pred = (stk_model.predict(valid_predicts_x) > 0.5).astype(int)
print(accuracy_score(valid_t, valid_stk_pred))

# stackedモデルの予測(test)
test_stk_pred = (stk_model.predict(test_predicts_x) > 0.5).astype(int)
print(accuracy_score(test_t, test_stk_pred))

0.9825
0.958


3モデルStacking

In [97]:
%%time
model_list = [vgg1, vgg2, vgg3]

train_predicts_x = make_stacked_predict(train, model_list)
valid_predicts_x = make_stacked_predict(valid, model_list)
test_predicts_x = make_stacked_predict(test, model_list)

  " Skipping tag %s" % (size, len(data), tag))


CPU times: user 5min 7s, sys: 1min 29s, total: 6min 36s
Wall time: 6min 36s


In [109]:
# stackedモデルの学習
stk_model = RandomForestRegressor()
stk_model.fit(train_predicts_x, train_t)

# stackedモデルの予測(valid)
valid_stk_pred = (stk_model.predict(valid_predicts_x) > 0.5).astype(int)
print(accuracy_score(valid_t, valid_stk_pred))

# stackedモデルの予測(test)
test_stk_pred = (stk_model.predict(test_predicts_x) > 0.5).astype(int)
print(accuracy_score(test_t, test_stk_pred))

0.98
0.937


In [110]:
# Test Average
avg_predict = (test_predicts_x.mean(axis=1) > 0.5).astype(int)
evaluate_predict(test_t, avg_predict)

0.937
[[481  19]
 [ 44 456]]
