# Keras によるモデル開発とパラメータチューニング (Hyperdrive)

Keras を利用したモデル学習を行います。犬、猫の画像を区別する画像認識モデルを構築し、[Hyperdrive](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters) によるハイパーパラメータチューニングも実施します。

## Azure Machine Learning Python SDK インポート

In [1]:
from azureml.core import Workspace, Dataset, Experiment
print(azureml.core.VERSION)

1.0.65


## Azure Machine Learning Workspace への接続

In [2]:
subscription_id = '9c0f91b8-eb2f-484c-979c-15848c098a6b'
resource_group = 'dllab-test'
workspace_name = 'azureml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

## 計算環境 Machine Learning Compute のアタッチ

In [3]:
from azureml.core.compute import ComputeTarget
compute_target = ComputeTarget(workspace,"gpu-clst")

## 実験名の設定

In [4]:
experiment = Experiment(workspace = workspace, name = "keras_catdog_hyperdrive")

## 学習データ

In [5]:
catdog = Dataset.get_by_name(workspace, name='test')

## モデル学習コード

In [6]:
import os
project_folder="./keras_hyperdrive"
os.makedirs(project_folder, exist_ok=True)

In [7]:
%%writefile {project_folder}/keras_dogcat.py

import numpy as np
import tensorflow as tf

from PIL import Image
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.preprocessing.image import array_to_img
from tensorflow.python.keras.preprocessing.image import img_to_array
from tensorflow.python.keras.preprocessing.image import load_img
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Input, Dense
from tensorflow.python.keras.models import Model
import os
from azureml.core import Run
from azureml.core import Workspace, Dataset
from keras.utils import plot_model
import argparse

print("## START Script ##")


parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', type=int, dest='batch_size', default=16, help='mini batch size for training')
parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.01, help='learning rate')
parser.add_argument('--dropout', type=float, dest='dropout', default=0.01, help='dropout rate')
parser.add_argument('--epoch', type=int, dest='epoch', default=10, help='number of epoch')


args = parser.parse_args()



batch_size = args.batch_size
learing_rate = args.learning_rate
dropout = args.dropout
epoch = args.epoch

run = Run.get_context()
run.input_datasets['catdog'].download(target_path='.', overwrite=False)


print(os.listdir())
print(os.listdir("train"))
print(os.listdir("test"))
print(os.listdir("train/cat"))
print(os.listdir("test/cat"))
print(os.listdir("train/dog"))
print(os.listdir("test/dog"))


from pip._internal.operations.freeze import freeze

class RunCallback(tf.keras.callbacks.Callback):
    def __init__(self, run):
        self.run = run
        
    def on_epoch_end(self, batch, logs={}):
        self.run.log(name="training_acc", value=float(logs.get('acc')))
        self.run.log(name="validation_acc", value=float(logs.get('val_acc')))
    

inputs = Input(shape=(150, 150, 3))
x = Conv2D(32, (3, 3))(inputs)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(32, (3, 3))(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(64, (3, 3))(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Flatten()(x)
x = Dense(64, activation="relu")(x)
x = Dropout(dropout)(x)
prediction = Dense(1, activation="sigmoid")(x)

model = Model(inputs=inputs, outputs=prediction)
model.compile(loss="binary_crossentropy",optimizer="rmsprop",metrics=["accuracy"])


callbacks = list()
callbacks.append(RunCallback(run))

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
    './train',
    target_size=(150, 150), # resize
    batch_size=batch_size,
    class_mode="binary")

validation_generator = test_datagen.flow_from_directory(
     './test',
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode="binary")

VERBOSE = 1

print("## START TRAINING ##")

model.fit_generator(
    train_generator,
    steps_per_epoch=2000 // batch_size,
    epochs=epoch,
    validation_data=validation_generator,
    validation_steps=800 // batch_size,
    callbacks= callbacks)

run.log("Batch size",batch_size)
run.log("Num of Epoch", epoch)
run.log("Dropout Rate", dropout)

model.save_weights("./outputs/keras_simple.h5")
#plot_model(model, to_file='./outputs/model.png')


Overwriting ./keras_hyperdrive/keras_dogcat.py


## Estimator 設定

In [8]:
from azureml.train.dnn import TensorFlow

script_params = {
    '--batch-size': 2,
    '--learning-rate': 0.00001,
    '--dropout': 0.03,
    '--epoch':20
}


estimator = TensorFlow(source_directory=project_folder,
                       script_params = script_params,
                       compute_target=compute_target,
                       entry_script='keras_dogcat.py',
                       framework_version = '1.13',
                       pip_packages=['keras','Pillow','azureml-dataprep[pandas,fuse]'],
                       inputs=[catdog.as_named_input('catdog')]
                      )

## モデル学習と結果確認

In [9]:
run = experiment.submit(estimator)

In [10]:
from azureml.widgets import RunDetails
RunDetails(run).show() 

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

## Hyperdrive ハイパーパラメータチューニング

In [11]:
from azureml.train.dnn import TensorFlow


tf_hyperdrive_est = TensorFlow(source_directory=project_folder,
                       #script_params = script_params,
                       compute_target=compute_target,
                       entry_script='keras_dogcat.py',
                       framework_version = '1.13',
                       pip_packages=['keras','Pillow','azureml-dataprep[pandas,fuse]'],
                       inputs=[catdog.as_named_input('catdog')]
                      )

In [12]:
from azureml.train.hyperdrive import *

ps = RandomParameterSampling(
    {
        '--batch-size': choice(25, 50, 75,100),
        '--learning-rate': loguniform(-6, -1),
        '--dropout': loguniform(-6, -1),
        '--epoch' : choice(range(10,100))
    }
)

In [13]:
# 早期停止ポリシー (2イテレーション毎チェック、トップ10%未満の場合は停止)
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

In [14]:
htc = HyperDriveConfig(estimator=tf_hyperdrive_est, 
                          hyperparameter_sampling=ps, 
                          policy=policy, 
                          primary_metric_name='training_acc', 
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=10,
                          max_concurrent_runs=10)

## Hyperdrive によるモデル学習と結果確認

In [15]:
htr = experiment.submit(config=htc)

In [16]:
RunDetails(htr).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## モデル登録

In [None]:
best_run = htr.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
print(best_run)

In [None]:
model = best_run.register_model(model_name='tf-catdog-hyperdrive', model_path='outputs/')
print(model.name, model.id, model.version, sep = '\t')

In [None]:
run