<h2>ハイパーパラメタチューニング</h2>

<h4>エクスポートしたデータを読込みます</h4>

In [None]:
import numpy as np
npz = np.load('docdata1.npz')
print(npz.files)
x = npz['arr_0']
y = npz['arr_1']

<h4>読込んだ内容を確認します</h4> 

In [None]:
print(x.shape)
print(y.shape)
print(x[0])
print(y[0])

<h3>Azure パッケージをインポートします</h3>

In [None]:
#import azureml.core
from azureml.core import Workspace, Experiment, Dataset, Datastore, ScriptRunConfig
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DockerConfiguration
from azureml.data import OutputFileDatasetConfig
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice

workspace = Workspace.from_config()

<h3>コンピュートターゲットを指定します</h3>

In [None]:
aml_compute_target = "demo-cpucluster1"  # <== The name of the cluster being used
try:
    aml_compute = ComputeTarget(workspace, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("no compute target with the specified name found")

<h3>データセットをロードします</h3>

In [None]:
def_blob_store = workspace.get_default_datastore()
def_blob_store.upload_files(files = ['docdata1.npz'],
                       target_path = 'workshop2/',
                       overwrite = True,
                       show_progress = True)

input_data = Dataset.File.from_files(def_blob_store.path('workshop2/docdata1.npz')).as_named_input('input').as_mount()

<h3>ハイパーパラメタを指定します</h3>

- チューニング対象のパラメタは除外しておきます

In [None]:
epochs = 20
batch_size = 128
# drop_out = 0.3
# hidden_dim = 100
layer_dim = 2
embedding_dim = 200
vocab_size = 7295

<h3>ScriptiRunConfig を構成します</h3>

- チューニング対象のパラメタは除外しておきます

In [None]:
myenv = Environment("myenv")

myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'azureml-mlflow',
    'numpy',
    'mlflow',
    'torch==1.8.1',
    'pytorch-lightning==1.3.1'
])

docker_config = DockerConfiguration(use_docker=True)

src = ScriptRunConfig(source_directory='script_folder', 
                      script='script10.py', 
                      arguments =['--datadir', input_data,
                                  '--epochs', epochs,
                                  '--batch_size', batch_size,
#                                  '--drop_out', drop_out,
#                                  '--hidden_dim', hidden_dim,
                                  '--layer_dim', layer_dim,
                                  '--embedding_dim', embedding_dim,
                                  '--vocab_size', vocab_size
                                 ],
                      compute_target=aml_compute,
                      environment=myenv,
                      docker_runtime_config=docker_config)

<h3>チューニング対象のパラメタを構成します</h3>

- choice を使うと指定したパラメタの組合せが実行されます
<br>[RandomParameterSampling](https://docs.microsoft.com/ja-jp/azure/machine-learning/how-to-tune-hyperparameters#define-search-space)

In [None]:
param_sampling = RandomParameterSampling( {
    "--drop_out": choice(0.1, 0.2, 0.3),
    "--hidden_dim": choice(80, 100, 120)
    }
)

hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling, 
                                     primary_metric_name='val_acc',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=10,
                                     max_concurrent_runs=4)

<h3>実験の送信</h3>

In [None]:
exp = Experiment(workspace, 'text_run_hyper_4')
hyperdrive_run = exp.submit(hyperdrive_config)

In [None]:
%%time
hyperdrive_run.wait_for_completion(show_output=True)

<h4>ステータスを確認します</h4>

In [None]:
assert(hyperdrive_run.get_status() == "Completed")

<h4>ベストなハイパーパラメタの組合せを確認します</h4>

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.id)
print(best_run.get_details()['runDefinition']['arguments'])

<h4>アウトプットの一覧を表示します</h4>

In [None]:
best_run.get_file_names()

<h4>モデルを登録します</h4>

In [None]:
model = best_run.register_model(model_name='text-classification-lstm-hyper', model_path='outputs/models/text_classifier_lstm.pt')