In [1]:
import azureml.core
from azureml.core import Workspace
# Load the workspace from the saved config file
ws = Workspace.from_config()


In [None]:
# Load data
from azureml.core import Dataset,Workspace, Datastore 
adlsgen_blob_ds = ws.get_default_datastore()
df_diabetes = Dataset.Tabular.from_delimited_files(path=(adlsgen_blob_ds,'/data/diabetes.csv'),header=True).to_pandas_dataframe()
df_diabetes.head()

In [6]:
# Create a folder for the experiment files
folder_name = 'ext_script'
experiment_folder = './' + folder_name

In [None]:
%%writefile $experiment_folder/diabetes_experiment.py
from azureml.core import Run
import pandas as pd
import os

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
data = pd.read_csv('./diabetes.csv')

# Count the rows and log the result
row_count = (len(data))
run.log('observations', row_count)
print('Analyzing {} rows of data'.format(row_count))

# Count and log the label counts
diabetic_counts = data['Outcome'].value_counts()
print(diabetic_counts)
for k, v in diabetic_counts.items():
    run.log('Label:' + str(k), v)
# Complete the run
run.complete()


In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


# 外部スクリプトを実行するコンピュートリソースを確認する
compute_name = 'cpucluster'
if compute_name in ws.compute_targets:
    cpu_cluster = ws.compute_targets[compute_name]
    if cpu_cluster and type(cpu_cluster) is AmlCompute:
        print("found compute target: " + compute_name)
else:
    # コンピューティングクラスタの作成
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, 'cpucluster', compute_config)
    cpu_cluster.wait_for_completion(show_output=True)
    print(cpu_cluster.get_status().serialize())


In [None]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails

# Create a Python environment for the experiment (from a .yml file)
env = Environment.from_conda_specification("experiment_env", "conda.yml")

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='diabetes_experiment.py',
                                compute_target=cpu_cluster,
                                environment=env)

# submit the experiment
experiment = Experiment(workspace=ws, name='mslearn-diabetes_ext_script')
run = experiment.submit(config=script_config)


In [None]:
RunDetails(run).show()
run.wait_for_completion()

In [None]:
# 補足
# 外部リソースを実行する環境の作成（ライブラリのinstallとか）
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# to install required packages
env = Environment('myenv')
cd = CondaDependencies.create(
    pip_packages=['azureml-dataset-runtime[pandas,fuse]', 'azureml-defaults'], 
    conda_packages = ['scikit-learn', 'xgboost'])

env.python.conda_dependencies = cd

# Register environment to re-use later
env.register(workspace = ws)