# ニューラルネットワークモデルの作成

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
import keras

Using TensorFlow backend.


EDAで作成したデータファイル（``data_7.csv``）から特徴量を読み込む

In [3]:
df_7 = pd.read_csv('data_7.csv')
df_7.describe()

Unnamed: 0,Id,age,domain1_var1,domain1_var2,domain2_var1,domain2_var2,IC_01,IC_07,IC_05,IC_16,...,CBN(13)_vs_DMN(94),CBN(18)_vs_DMN(94),CBN(4)_vs_DMN(94),CBN(7)_vs_DMN(94),CBN(18)_vs_CBN(13),CBN(4)_vs_CBN(13),CBN(7)_vs_CBN(13),CBN(4)_vs_CBN(18),CBN(7)_vs_CBN(18),CBN(7)_vs_CBN(4)
count,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,...,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0,5877.0
mean,15909.667007,50.034068,51.502462,59.30438,47.328355,51.91008,0.005368,0.009237,0.01062,0.000895,...,-0.126329,0.340097,0.126801,0.299151,0.49532,0.578637,0.461299,0.19764,0.768528,0.354929
std,3411.775315,13.539881,9.801768,10.957016,11.087953,11.799972,0.004585,0.004162,0.003571,0.003587,...,0.254345,0.180884,0.238343,0.201886,0.197955,0.268321,0.254933,0.299296,0.193878,0.182111
min,10001.0,14.257265,15.769168,1.021874,0.991172,0.815285,-0.015894,-0.007958,-0.00224,-0.013459,...,-0.932657,-0.584421,-0.709769,-0.559527,-0.686442,-0.467751,-0.639171,-1.142909,-0.471138,-0.323693
25%,12961.0,40.129361,45.397852,53.133474,40.225097,44.586221,0.002445,0.006437,0.008172,-0.001451,...,-0.302299,0.230182,-0.02681,0.173736,0.374122,0.424559,0.307198,0.022054,0.637085,0.243677
50%,15925.0,50.427747,51.847306,60.052535,47.811205,52.572032,0.005512,0.009205,0.010567,0.000786,...,-0.111208,0.357885,0.146985,0.316375,0.49722,0.579708,0.476942,0.247063,0.755655,0.362175
75%,18886.0,59.580851,57.892677,66.521451,55.024768,59.843566,0.008443,0.012035,0.012972,0.003207,...,0.061984,0.469599,0.300593,0.443144,0.615462,0.738983,0.624984,0.4133,0.881732,0.471903
max,21754.0,84.491113,81.32558,94.702874,82.164478,94.509903,0.022888,0.027168,0.024085,0.022613,...,0.650448,0.852686,0.780373,0.912523,1.513935,2.123638,1.562309,1.102878,1.857374,1.282488


ニューラルネットワークを使用するので、特徴量および予測値の両者を標準化する。  
またfMRIデータ（相関係数）は数が多く、これに対する過学習を避けるため標準化したうえでさらに500で割り、特徴量の影響を軽減する。

In [4]:
X = df_7.iloc[:, 6:].values
y = df_7.iloc[:, 1:6].values

# train/validationに分割
X_train, X_val, y_train, y_val = train_test_split(
    X, y, random_state=1, train_size=0.75)

# 標準化
sc_x = StandardScaler()
sc_y = StandardScaler()
sc_x.fit(X_train)
sc_y.fit(y_train)
X_train_std = sc_x.transform(X_train)
X_val_std = sc_x.transform(X_val)
y_train_std = sc_y.transform(y_train)
y_val_std = sc_y.transform(y_val)

# fMRIデータ（86列目以降）は500で除す
X_train_tsf = X_train_std.copy()
X_train_tsf[:, 86:] = X_train_std[:, 86:]/500
X_val_tsf = X_val_std.copy()
X_val_tsf[:, 86:] = X_val_std[:, 86:]/500

Kerasを用いてモデル定義（ノード数：input:1463→720→360→180→output:5）

In [23]:
def build_model():
    model = tf.keras.Sequential([
            tf.keras.layers.Dense(720, activation = tf.nn.relu, input_shape=(1457,)),
            tf.keras.layers.Dense(360, activation = tf.nn.relu),
            tf.keras.layers.Dense(180, activation = tf.nn.relu),
            tf.keras.layers.Dense(5, activation = tf.keras.activations.linear)])
    return model

K-foldを使用し、5通りのtrain/validationで学習

In [25]:
# K-fold
kfold = KFold(n_splits=5).split(X_std, y_std)
model = build_model()
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', 
                                           min_delta=0, 
                                           patience=20, 
                                           verbose=1, 
                                           mode='auto')

for train_index, test_index in kfold:
    X_train, X_val = X_std[train_index], X_std[test_index]
    y_train, y_val = y_std[train_index], y_std[test_index]

    model.summary()
    model.compile(loss='mean_absolute_error',
                  optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
                  metrics=['mean_absolute_error'])
    history = model.fit(X_train, y_train,
                        validation_data=(X_val, y_val),
                        batch_size=100,
                        epochs=25,
                        callbacks=[early_stop],
                        verbose=1)

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 720)               1049760   
_________________________________________________________________
dense_41 (Dense)             (None, 360)               259560    
_________________________________________________________________
dense_42 (Dense)             (None, 180)               64980     
_________________________________________________________________
dense_43 (Dense)             (None, 5)                 905       
Total params: 1,375,205
Trainable params: 1,375,205
Non-trainable params: 0
_________________________________________________________________
Train on 4701 samples, validate on 1176 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/2

Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 00022: early stopping
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 720)               1049760   
_________________________________________________________________
dense_41 (Dense)             (None, 360)               259560    
_________________________________________________________________
dense_42 (Dense)             (None, 180)               64980     
_________________________________________________________________
dense_43 (Dense)             (None, 5)                 905       
Total params: 1,375,205
Trainable params: 1,375,205
Non-trainable params: 0
_________________________________________________________________
Train on 4702 samples, validate on 1175 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Ep

In [20]:
# 評価
preds = model.predict(X_tsf)
preds = sc_y.inverse_transform(preds)
scores = np.sum(np.abs(y - preds), axis=0) / np.sum(preds, axis=0)
scores

array([0.12352451, 0.10939322, 0.10925011, 0.13513068, 0.12834675])

モデル保存