In [1]:
# Connect to Azure Machine Learning Workspace 
import sys
import os
import json
import archai.common.azureml_helper as aml_helper

sys.path.append(os.path.realpath('scripts'))

from cnn_search_space import CNNSearchSpace

# make sure we have a scripts dir for the code to run our jobs.
import os
scripts_dir = "./scripts"
os.makedirs(scripts_dir, exist_ok=True)

config_file = "../.azureml/config.json"
config = json.load(open(config_file, 'r'))

for required_key in ['subscription_id', 'resource_group', 'workspace_name', 'storage_account_key', 'storage_account_name']:
    if not required_key in config:
        print(f"### Error: please add a {required_key} to {config_file}")

storage_account_key = config['storage_account_key']    
storage_account_name = config['storage_account_name']

print(f'Using storage account: {storage_account_name}')

Using storage account: archaimnistmodels


In [2]:
# Get a handle to the workspace
ml_client = aml_helper.get_aml_client_from_file(config_path=config_file)

Found the config file in: ..\.azureml\config.json


In [3]:
# Create cpu cluster for running the search
cpu_compute_name = "nas-cpu-cluster-D14-v2"
aml_helper.create_compute_cluster(ml_client, cpu_compute_name, size="Standard_D14_v2", location="westus")

# Create gpu cluster for running the search
gpu_compute_name = "nas-gpu-cluster-NC6"
aml_helper.create_compute_cluster(ml_client, gpu_compute_name, size="Standard_NC6", location="westus")

You already have a cluster named nas-cpu-cluster-D14-v2, we'll reuse it as is.
You already have a cluster named nas-gpu-cluster-NC6, we'll reuse it as is.


In [20]:
archai_job_env = aml_helper.create_environment_from_file(ml_client, conda_file="conda.yaml", version='0.1.4')

Environment with name aml-archai is registered to workspace, the environment version is 0.1.4


In [21]:
from azure.ai.ml.entities import AzureBlobDatastore
from azure.ai.ml.entities._credentials import AccountKeyConfiguration

model_container_name = "models"
# Create a blob store container for storing our NAS generated models in.
blob_store = AzureBlobDatastore(
    name="models",
    description="Datastore pointing to our models blob container.",
    account_name=storage_account_name,
    container_name=model_container_name,
    credentials=AccountKeyConfiguration(
        account_key=storage_account_key
    ),
)

ml_client.create_or_update(blob_store)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'models', 'description': 'Datastore pointing to our models blob container.', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/c8b7f913-60fb-4759-a310-fc5630e56f99/resourceGroups/snpe-aml-rg/providers/Microsoft.MachineLearningServices/workspaces/snpe-aml-workspace/datastores/models', 'Resource__source_path': None, 'base_path': 'd:\\git\\microsoft\\archai\\docs\\advanced_guide\\cloud\\notebooks\\distributed_search', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x000001C93183F460>, 'credentials': {'type': 'account_key'}, 'container_name': 'models', 'account_name': 'archaimnistmodels', 'endpoint': 'core.windows.net', 'protocol': 'https'})

In [22]:
# Register the datastore with AML
data_store_name = "datasets"
data_container_name = "datasets"

data_store = AzureBlobDatastore(
    name=data_store_name,
    description="Datastore pointing to our dataset container.",
    account_name=storage_account_name,
    container_name=data_container_name,
    credentials=AccountKeyConfiguration(
        account_key=storage_account_key
    ),
)

ml_client.create_or_update(data_store)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'datasets', 'description': 'Datastore pointing to our dataset container.', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/c8b7f913-60fb-4759-a310-fc5630e56f99/resourceGroups/snpe-aml-rg/providers/Microsoft.MachineLearningServices/workspaces/snpe-aml-workspace/datastores/datasets', 'Resource__source_path': None, 'base_path': 'd:\\git\\microsoft\\archai\\docs\\advanced_guide\\cloud\\notebooks\\distributed_search', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x000001C93183F4F0>, 'credentials': {'type': 'account_key'}, 'container_name': 'datasets', 'account_name': 'archaimnistmodels', 'endpoint': 'core.windows.net', 'protocol': 'https'})

In [23]:
from store import ArchaiStore

# make sure the datasets container exists
store = ArchaiStore(storage_account_key, storage_account_name, blob_container_name=data_container_name)
store.upload_blob("MNIST", config_file)

# make sure the models container exists
store = ArchaiStore(storage_account_key, storage_account_name, blob_container_name=model_container_name)
store.upload_blob("config", config_file)

datastore_path = f'azureml://datastores/{data_store_name}/paths/MNIST/'
results_path = f'azureml://datastores/{model_container_name}/paths/MNIST/'

In [24]:
from azure.ai.ml import command
from azure.ai.ml import Input, Output


data_prep_component = command(
    name="data_prep2",
    display_name="Data preparation for training",
    description="Downloads the remote dataset to our blob store.",
    inputs= {
        "name": Input(type='string')
    },
    outputs= {
        "data": Output(type="uri_folder", path=datastore_path, mode="rw_mount")
    },

    # The source folder of the component
    code=scripts_dir,
    command="""python3 prep_data_store.py \
            --path ${{outputs.data}} \
            """,
    environment=f"{archai_job_env.name}:{archai_job_env.version}",
)

In [25]:
environment_name = f'{archai_job_env.name}:{archai_job_env.version}'
hex_config = bytes(json.dumps(config), encoding='utf-8').hex()

search_component = command(
    name="search",
    display_name="The Archai NAS search",
    description="Runs the NAS search algorithm.",    
    inputs= {
        "data": Input(type="uri_folder")
    },
    outputs= {
        "results": Output(type="uri_folder", path=results_path, mode="rw_mount")
    },
    code=scripts_dir,
    command="""python3 search.py \
            --data_dir ${{inputs.data}} \
            --output_dir ${{outputs.results}} """ + \
            f'--environment {environment_name} ' + \
            f'--compute {gpu_compute_name} ' + \
            f'--config {hex_config}',
    environment=f"{archai_job_env.name}:{archai_job_env.version}",
)

In [26]:
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute=cpu_compute_name,
    description="Data prep pipeline2",
)
def mnist_pipeline():
    # using data_prep_function like a python call with its own inputs
    data_prep_job = data_prep_component(
        name="MNIST"
    )

    # check the dataset
    check_job = search_component(
        data=data_prep_job.outputs.data
    )
    
    return { "data": data_prep_job.outputs.data }

In [27]:
pipeline = mnist_pipeline()

# submit the pipeline job
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    # Project's name
    experiment_name="mnist_test_run",
)


In [28]:
import webbrowser
# open the pipeline in web browser
webbrowser.open(pipeline_job.services["Studio"].endpoint)

True

In [25]:
print(f"{archai_job_env.name}:{archai_job_env.version}")

aml-archai:0.1.3


In [20]:
@dsl.pipeline(
    compute=cpu_compute_name,
    description="Archai MNIST search",
)
def mnist_pipeline():
    # using data_prep_function like a python call with its own inputs
    data_prep_job = data_prep_component(
        name="MNIST"
    )

    # check the dataset
    check_job = search_component(
        data=data_prep_job.outputs.data
    )
    
    return { "data": data_prep_job.outputs.data }


_PYTHON_SDK_TYPE_MAPPING
_SchemaValidatableMixin__base_path_for_validation
_SchemaValidatableMixin__schema_validate
__abstractmethods__
__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__sizeof__
__slotnames__
__slots__
__str__
__subclasshook__
__weakref__
_abc_impl
_build_input
_build_inputs
_build_inputs_dict
_build_inputs_dict_without_meta
_build_output
_build_output_for_pipeline
_build_outputs
_build_outputs_dict
_build_outputs_dict_without_meta
_build_pipeline_outputs_dict
_check_private_preview_features
_component_items_from_path
_create_empty_validation_result
_create_schema_for_validation
_create_schema_for_validation_with_base_path
_customized_validate
_dump_for_validation
_find_source_from_other_jobs
_find_source_from_parent_inputs
_find_source_from_parent_outputs
_find_source_input_output_type
_from_rest_inpu