In [174]:
# ファイルナンバリング
number  = 'model204'

In [154]:
import pandas as pd
import keras
from tqdm.autonotebook import tqdm
from matplotlib import pyplot as plt
import numpy as np
from keras.models import Sequential
from keras import layers 
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import csv
import optuna
pd.set_option('display.max_columns', 200)

In [155]:
data = pd.read_csv('./datas/data2.csv')

In [156]:
data['sum_precipitation'] = data[[
'haregamine',
'seikaen',
'kagamiko',
'kanazawa',
'osawa',
'shiyakusho', 
'izuminohoikuen',
'tamagawahoikuen', 
'hakubutsukan', 
'kohigashihoikuen', 
'yonezawahoikuen', 
'kashiwabarakominkan', 
'kirigamine', 
'shirakabako', 
'kitayatsugatake', 
'okutateshina', 
'minoto', 
'haramura', 
'toyokanko', 
'tokyu', 
'alpico', 
'village', 
'mitsuinomori', 
'kajima']].sum(axis='columns')

In [157]:
data['sum_suii'] = data[[
'd00010150ee',
'd0001003b30',
'd0001003cd5',
'd0001014057',
'd0001014058',
'd0001014149',
'd00010142a8',
'd00010150ff',
'd0001015154',
'd0001015161',
'd000101516f',
'd000101518a',
'd0001015192',
'd00010151cf',
'd00010151f4',
'd0001015220',
'd0001015272',
'd00010152a0',
'd00010152c3',
'd000101530c',
'd0001014224']].sum(axis='columns')

In [158]:
edited_data = data[['date', 'zeniba', 'sum_precipitation', 'sum_suii']]

In [159]:
# date列は除く
# np_data = data.iloc[:, 1:].values
np_data = edited_data.iloc[:, 1:].to_numpy(dtype=float)

In [160]:
mmscaler = MinMaxScaler(feature_range=(0,1)) # インスタンスの作成
mmscaler.fit(np_data)           # 最大・最小を計算
np_data = mmscaler.transform(np_data) # 変換

In [161]:
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=1):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(min_index + lookback, max_index, size = batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)
            
        samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(rows), ))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][0]
        
        yield samples, targets

In [162]:
# ハイパーパラメータ
lookback = 70
step = 1  # 10分おきにサンプリング
delay = 12  # 2時間後の推移を予測する
batch_size = 64
steps_per_epoch = 75
# lr = 0.001
# recurrent_dropout1 = 0.5
# recurrent_dropout2 = 0.5

In [163]:
train_max = 8000
val_max = 10000


# 訓練ジェネレータ
train_gen = generator(np_data,
                        lookback=lookback,
                        delay=delay,
                        min_index=0,
                        max_index=train_max,
                        shuffle=True,
                        step=step,
                        batch_size=batch_size)

# 検証ジェネレータ
val_gen = generator(np_data,
                        lookback=lookback,
                        delay=delay,
                        min_index=train_max+1,
                        max_index=val_max,
                        step=step,
                        batch_size=batch_size)

# テストジェネレータ
test_gen = generator(np_data,
                        lookback=lookback,
                        delay=delay,
                        min_index=val_max+1,
                        max_index=None,
                        step=step,
                        batch_size=batch_size)

# 検証データセット全体を調べるために　val_genから抽出する時刻刻みの数
val_steps = (val_max - (train_max+1) - lookback) // batch_size

# テストデータセット全体を調べるために　val_genから抽出する時刻刻みの数
test_steps = (len(np_data) - (val_max+1) - lookback) // batch_size

In [164]:
# ModelCheckpoint コールバックと EarlyStopping コールバック
callbacks_list = [
    # 改善が止まったら訓練を中止
    keras.callbacks.EarlyStopping(
        monitor='val_loss',  # 検証データでのモデルの誤差を関し
        patience=30              # 25エポック以上に渡って誤差が改善しなければ訓練を中止
    ),
    # エポックごとに現在の重みを保存
    keras.callbacks.ModelCheckpoint(
        filepath='models/' + number + '.h5',  # モデルの保存先となるファイルへのパス
        monitor='val_loss', 
        save_best_only=True  # 最も良いモデルを保存する
    )
]

In [183]:
count_trial = 1

def create_model(trial):
    global count_trial
    print(count_trial)
    count_trial += 1
    
    # 学習率
    lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    
    # 隠れ層
    hl1 = trial.suggest_int('hidden_layer1', 2, 128)
    hl2 = trial.suggest_int('hidden_layer2', 2, 128)
    
    # ドロップアウト率
    rd1 = trial.suggest_uniform('reccurent_dropout1', 0.0, 0.8)
    rd2 = trial.suggest_uniform('reccurent_dropout2', 0.0, 0.8)

    

    

    model = Sequential()
    model.add(layers.LSTM(hl1,
                          recurrent_dropout=rd1,
                          return_sequences=True,
                          input_shape=(None, np_data.shape[-1])))
    model.add(layers.LSTM(hl2,
                          recurrent_dropout=rd2,
                          return_sequences=False))
    model.add(layers.Dense(1))



    rmsprop = RMSprop(lr=lr)
    model.compile(optimizer=rmsprop, loss='mse')
    history = model.fit_generator(train_gen,
                                 steps_per_epoch=steps_per_epoch,
                                 epochs=500,
                                 callbacks=callbacks_list,
                                 validation_data=val_gen,
                                 validation_steps=val_steps,
                                 verbose=0)
    
    return min(history.history['val_loss'])

In [184]:
study = optuna.create_study()
study.optimize(create_model, n_trials=100)

[I 2020-09-18 14:06:49,489] A new study created in memory with name: no-name-453cc813-9d55-4bea-9d24-846476c4f308


1


KeyboardInterrupt: 

In [167]:
best_params = study.best_params

In [168]:
best_value = study.best_value

In [171]:
print(best_params)
print(best_value)

{'learning_rate': 2.6620440776826038e-05, 'hidden_layer1': 110, 'hidden_layer2': 24, 'reccurent_dropout1': 0.6586103171064326, 'reccurent_dropout2': 0.3082568929314497}
0.001780390040948987


In [170]:
# csvファイルに結果を追記
with open('results/search_by_optuna.csv', 'a') as f:
    writer = csv.writer(f)
    writer.writerow([best_params, best_value])