# 必要なパッケージのインストール

以下のパッケージをインストールしておきましょう。

```bash
pip install -U azureml-sdk[tensorboard]
```

In [None]:
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.train.dnn import PyTorch
from azureml.core.runconfig import MpiConfiguration
# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

# パラメータの指定

以下のように、パラメータを指定します。

In [None]:
# Experimentの名前の指定
EXPERIMENT_NAME = 'MNIST-GAN'

# Computing Targetの設定
COMPUTE_NAME = "TwoNodeTwoGpus"
MIN_NODE = 0
MAX_NODE = 2
NUM_NODE = 2
NUM_GPU = 2
VM_SIZE = "STANDARD_NV12"

# Scriptに関する設定
SCRIPT_DIR = "scripts"
ENTRY_SCRIPT = "simple_gan.py"
SCRIPT_PARAMS = {
    "--lr": 2e-4 * NUM_NODE * NUM_GPU,
    "--batch_size": 64 * NUM_NODE * NUM_GPU,
}

# 実験の実行

Workspace内に、ExperimentとComputing Targetを作成し、EstimatorをSubmitしましょう。

## Workspaceの指定

In [None]:
ws = Workspace.from_config()

## Experimentの指定（存在しなければ作成されます）

In [None]:
exp = Experiment(workspace=ws, name=EXPERIMENT_NAME)

## Computing Targetの指定（存在しなければ作成します）

In [None]:
if COMPUTE_NAME in ws.compute_targets:
    compute_target = ws.compute_targets[COMPUTE_NAME]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + COMPUTE_NAME)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size=VM_SIZE,
        vm_priority="lowpriority",
        min_nodes=MIN_NODE,
        max_nodes=MAX_NODE
    )

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, COMPUTE_NAME, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

## Estimatorの作成と提出

In [None]:
mpi_config = MpiConfiguration()
mpi_config.process_count_per_node = 2

est = PyTorch(
    source_directory=SCRIPT_DIR,
    script_params=SCRIPT_PARAMS,
    compute_target=compute_target,
    use_gpu=True,
    entry_script=ENTRY_SCRIPT,
    conda_packages=["pip"],
    pip_packages=["pytorch-lightning"],
    node_count=NUM_NODE,
    distributed_training=mpi_config,
    framework_version="1.5",
    shm_size="8G",
)

In [None]:
run = exp.submit(config=est)
run

In [None]:
run

# TensorBoardの利用

Azure Machine Learning Servicesでは、TensorBoardを使うことができます。

In [None]:
from azureml.tensorboard import Tensorboard
tb = Tensorboard([run], port=8008)
# If successful, start() returns a string with the URI of the instance.
tb.start()

In [None]:
# 停止
tb.stop()