<h3>コンピュートクラスタでトレーニングを実行します</h3>

<h4>エクスポートしたデータを読込みます</h4>

In [None]:
import numpy as np
npz = np.load('docdata1.npz')
print(npz.files)
x = npz['arr_0']
y = npz['arr_1']

<h4>読込んだ内容を確認します</h4> 

In [None]:
print(x.shape)
print(y.shape)
print(x[0])
print(y[0])

<h4>Azureml パッケージをインポートします</h4> 

In [None]:
from azureml.core import Workspace, Experiment, Dataset, Datastore, ScriptRunConfig
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.data import OutputFileDatasetConfig
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DockerConfiguration

workspace = Workspace.from_config()

<h4>コンピュートターゲットを構成します</h4> 

In [None]:
aml_compute_target = "demo-cpucluster1"  # <== 作成済コンピュートクラスタ名を指定してください
try:
    aml_compute = ComputeTarget(workspace, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("no compute target with the specified name found")

<h4>データセットをロードします</h4> 

In [None]:
def_blob_store = workspace.get_default_datastore()
def_blob_store.upload_files(files = ['docdata1.npz'],
                       target_path = 'workshop2/',
                       overwrite = True,
                       show_progress = True)

input_data = Dataset.File.from_files(def_blob_store.path('workshop2/docdata1.npz')).as_named_input('input').as_mount()

<h4>ハイパーパラメタを指定します</h4> 
<br>epochs = 20, batch_size = 128, drop_out = 0.3, hidden_dim = 100, layer_dim=2, embedding_dim = 200, vocab_size = 7295</br>

In [None]:
epochs = 20
batch_size = 128
drop_out = 0.3
hidden_dim = 100
layer_dim = 2
embedding_dim = 200
vocab_size = 7295

<h4>ScriptiRunConfig を構成します</h4> 

In [None]:
myenv = Environment("myenv")

myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'azureml-mlflow',
    'numpy',
    'mlflow',
    'torch==1.8.1',
    'pytorch-lightning==1.3.1'
])

docker_config = DockerConfiguration(use_docker=True)

src = ScriptRunConfig(source_directory='script_folder', 
                      script='script10.py', 
                      arguments =['--datadir', input_data,
                                  '--epochs', epochs,
                                  '--batch_size', batch_size,
                                  '--drop_out', drop_out,
                                  '--hidden_dim', hidden_dim,
                                  '--layer_dim', layer_dim,
                                  '--embedding_dim', embedding_dim,
                                  '--vocab_size', vocab_size
                                 ],
                      compute_target=aml_compute,
                      environment=myenv,
                      docker_runtime_config=docker_config)

<h4>実験を送信します</h4> 

In [None]:
exp = Experiment(workspace, 'text_run_01')
run = exp.submit(config=src)

In [None]:
%%time
run.wait_for_completion(show_output=True)

<h4>アウトプットファイルの一覧を表示します</h4> 

In [None]:
run.get_file_names()

<h4>モデルをダウンロードします</h4> 

In [None]:
os.makedirs('./models', exist_ok=True)

for f in run.get_file_names():
    if f.startswith('outputs/models/'):
        output_file_path = os.path.join('./models', f.split('/')[-1])
        print('Downloading from {} to {} ...'.format(f, output_file_path))
        run.download_file(name=f, output_file_path=output_file_path)