In [1]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
from azureml.core import Workspace, Datastore, Dataset
from azureml.data.datapath import DataPath
from azureml.core import Datastore
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:

# Azure Machine Learning ワークスペースへのハンドルを取得する
credential = DefaultAzureCredential()
ml_client = None
try:
    ml_client = MLClient.from_config(credential)
except Exception as ex:
    print(ex)
    # Enter details of your AzureML workspace
    subscription_id = "<SUBSCRIPTION_ID>"
    resource_group = "<RESOURCE_GROUP>"
    workspace = "<AZUREML_WORKSPACE_NAME>"
    ml_client = MLClient(credential, subscription_id, resource_group, workspace)

Found the config file in: /config.json


In [3]:
# ワークスペースの取得
ws = Workspace.from_config()

In [4]:
# データセットの取得
train_dataset = ws.datasets['odFridgeObjects_20221116_123103']

In [31]:
exp_name = "dpv2-image-object-detection-experiment"

In [32]:
# コンピュートターゲットの設定

datastore = ws.get_default_datastore()

compute_name = 'cpu-cluster'
if not compute_name in ws.compute_targets :
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                                min_nodes=0,
                                                                max_nodes=1)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # Show the result
    print(compute_target.get_status().serialize())

compute_target = ws.compute_targets[compute_name]

In [47]:
# 環境構築

aml_run_config = RunConfiguration()
# Use just-specified compute target ("cpu-cluster")
aml_run_config.target = compute_target

# Specify CondaDependencies obj, add necessary packages
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn', 'pillow'], 
    pip_packages=['azureml-sdk[automl]', 'pyarrow', 'cv2'])

In [48]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep

prepped_data_path = OutputFileDatasetConfig(name="output_path")

dataprep_step = PythonScriptStep(
    name="dataprep", 
    script_name="dataprep.py", 
    compute_target=compute_target, 
    runconfig=aml_run_config,
    arguments=["--output_path", prepped_data_path],
    inputs=[train_dataset.as_named_input('train_ds')],
    allow_reuse=True
)

In [38]:
hoge = train_dataset.to_pandas_dataframe()

In [44]:
fuga = hoge.iloc[:, [0]]

In [42]:
hoge.iloc[:, 0] = fuga

In [43]:
hoge

Unnamed: 0,image_url,label,label_confidence,image_height,image_width
0,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/water_bottle', 'topX': 0.048...","[1.0, 1.0, 1.0, 1.0]",666,499
1,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/water_bottle', 'topX': 0.072...",[1.0],666,499
2,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/milk_bottle', 'topX': 0.0420...","[1.0, 1.0, 1.0, 1.0]",666,499
3,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/carton', 'topX': 0.124590527...","[1.0, 1.0, 1.0]",666,499
4,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/milk_bottle', 'topX': 0.1245...",[1.0],666,499
5,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/can', 'topX': 0.426551916049...",[1.0],666,499
6,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/milk_bottle', 'topX': 0.6500...","[1.0, 1.0]",666,499
7,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/carton', 'topX': 0.732539456...","[1.0, 1.0, 1.0, 1.0]",666,499
8,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/milk_bottle', 'topX': 0.4748...","[1.0, 1.0, 1.0, 1.0]",666,499
9,workspaceblobstore/UI/2022-11-16_121913_UTC/im...,"[{'label': 'drink/carton', 'topX': 0.205113564...","[1.0, 1.0]",666,499


In [49]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

pipeline = Pipeline(ws, [dataprep_step])

experiment = Experiment(workspace=ws, name='automl_exp')

run = experiment.submit(pipeline, show_output=True)
run.wait_for_completion()

Created step dataprep [697f9e7c][2d3c2743-db46-4858-b07e-bb46294306fd], (This step will run and generate new outputs)
Submitted PipelineRun 56521f0d-1636-42d5-af69-d58baa1a01f4
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/56521f0d-1636-42d5-af69-d58baa1a01f4?wsid=/subscriptions/6c8b72b4-b67f-494d-8404-25d9f0d8b1c1/resourcegroups/touka/workspaces/touka_rd&tid=8d41d6d7-36ce-4e0c-8c6d-cf2c54c39039
PipelineRunId: 56521f0d-1636-42d5-af69-d58baa1a01f4
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/56521f0d-1636-42d5-af69-d58baa1a01f4?wsid=/subscriptions/6c8b72b4-b67f-494d-8404-25d9f0d8b1c1/resourcegroups/touka/workspaces/touka_rd&tid=8d41d6d7-36ce-4e0c-8c6d-cf2c54c39039
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 1e35364d-872b-4d09-bd8d-67f795b9c49a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/1e35364d-872b-4d09-bd8d-67f795b9c49a?wsid=/subscriptions/6c8b72b4-b67f-494d-8404-25d9f0d8b1c1/resourcegroups/touka/