In [1]:
pip show azure-ai-ml

Name: azure-ai-ml
Version: 1.8.0
Summary: Microsoft Azure Machine Learning Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python
Author: Microsoft Corporation
Author-email: azuresdkengsysadmins@microsoft.com
License: MIT License
Location: /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages
Requires: azure-common, azure-core, azure-mgmt-core, azure-storage-blob, azure-storage-file-datalake, azure-storage-file-share, colorama, isodate, jsonschema, marshmallow, msrest, opencensus-ext-azure, pydash, pyjwt, pyyaml, strictyaml, tqdm, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [2]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential=DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
    
except Exception as ex:
    credential=InteractiveBrowserCredential()

In [3]:
ml_client=MLClient.from_config(credential=credential)

Found the config file in: /config.json


In [4]:
# List the Datastores

stores=ml_client.datastores.list()
for ds_name in stores:
    print(ds_name.name)

workspaceworkingdirectory
workspaceartifactstore
workspaceblobstore
workspacefilestore


<h3> Create a Data Store </h3>

In [6]:
key="StsGCqNMUkwgoUeCvd2OPAP0V0RPwJHvjC4ER6c59FPlKgifwFslqd4XZpGRMUxKrq8pYV59Wd3T+AStHHi2Ug=="

In [7]:
from azure.ai.ml.entities import AzureBlobDatastore
from azure.ai.ml.entities import AccountKeyConfiguration

store=AzureBlobDatastore(
    name='blob_training_data',
    description="Blob Storage for Training Data",
    account_name="mlwdp100storaged0308376e",
    container_name="training-data",
    credentials=AccountKeyConfiguration(
        account_key=key
    ),
)

ml_client.create_or_update(entity=store)


AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'blob_training_data', 'description': 'Blob Storage for Training Data', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourceGroups/rg-dp100-lb8165ca7f2e4432ba1/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-lb8165ca7f2e4432ba1/datastores/blob_training_data', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cib8165ca7f2e4432ba1/code/Users/ritishadhikari', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f9bddc8e6b0>, 'credentials': {'type': 'account_key'}, 'container_name': 'training-data', 'account_name': 'mlwdp100storaged0308376e', 'endpoint': 'core.windows.net', 'protocol': 'https'})

In [8]:
# List the Datastores Again

stores=ml_client.datastores.list()
for ds_name in stores:
    print(ds_name.name)

blob_training_data
workspaceworkingdirectory
workspaceartifactstore
workspaceblobstore
workspacefilestore


<h2>Create Data Assets - URI File</h2>

In [9]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path="azure-ml-labs/Labs/03/data/diabetes.csv"

my_data=Data(
    path=my_path,
    type=AssetTypes.URI_FILE,
    description="""
        Data Asset pointing to a local file, automatically uploaded to the 
        Default Datastore
    """,
    name="diabetes-local"
)

ml_client.data.create_or_update(data=my_data)

[32mUploading diabetes.csv[32m (< 1 MB): 0.00B [00:00, ?B/s][32mUploading diabetes.csv[32m (< 1 MB): 100%|██████████| 518k/518k [00:00<00:00, 5.12MB/s][32mUploading diabetes.csv[32m (< 1 MB): 100%|██████████| 518k/518k [00:00<00:00, 5.06MB/s]
[39m



Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'diabetes-local', 'description': '\n        Data Asset pointing to a local file, automatically uploaded to the \n        Default Datastore\n    ', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourceGroups/rg-dp100-lb8165ca7f2e4432ba1/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-lb8165ca7f2e4432ba1/data/diabetes-local/versions/1', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cib8165ca7f2e4432ba1/code/Users/ritishadhikari', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f9be923aa70>, 'serialize': <msrest.serialization.Serializer object at 0x7f9be9238df0>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/

<h2> Create Data Assets - URI Folder </h2>

In [10]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

datastore_path='azureml://datastores/blob_training_data/paths/data-asset-path/'

my_data=Data(
    name="diabetes-datastore-path",
    path=datastore_path,
    type=AssetTypes.URI_FOLDER,
    description="Data Asset pointing to data-asset-path folder in Datastore"    
)

ml_client.data.create_or_update(data=my_data)

Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_folder', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'diabetes-datastore-path', 'description': 'Data Asset pointing to data-asset-path folder in Datastore', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourceGroups/rg-dp100-lb8165ca7f2e4432ba1/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-lb8165ca7f2e4432ba1/data/diabetes-datastore-path/versions/1', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cib8165ca7f2e4432ba1/code/Users/ritishadhikari', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f9bdd803010>, 'serialize': <msrest.serialization.Serializer object at 0x7f9bdd801960>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/18a1f27f-edf5-495e-9acb-753c9333

<h2> Create Data Assets - MLTable </h2>

In [21]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

local_path = 'azure-ml-labs/Labs/03/data/'

my_data=Data(
    name="diabetes-table",
    path=local_path,
    type=AssetTypes.MLTABLE,
    description="ML Table Pointing to diabetes.csv in data folder"    
)

ml_client.data.create_or_update(data=my_data)

Uploading data (0.52 MBs):   0%|          | 0/518211 [00:00<?, ?it/s]Uploading data (0.52 MBs): 100%|██████████| 518211/518211 [00:00<00:00, 4506275.66it/s]Uploading data (0.52 MBs): 100%|██████████| 518211/518211 [00:00<00:00, 4394689.80it/s]




Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': ['./diabetes.csv'], 'type': 'mltable', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'diabetes-table', 'description': 'ML Table Pointing to diabetes.csv in data folder', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourceGroups/rg-dp100-lb8165ca7f2e4432ba1/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-lb8165ca7f2e4432ba1/data/diabetes-table/versions/4', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cib8165ca7f2e4432ba1/code/Users/ritishadhikari', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f9bdc7e5090>, 'serialize': <msrest.serialization.Serializer object at 0x7f9bdc7e5bd0>, 'version': '4', 'latest_version': None, 'path': 'azureml://subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegrou

In [20]:
! ls azure-ml-labs/Labs/03/data/

MLTable  diabetes.csv


In [23]:
datasets=ml_client.data.list()
for ds_name in datasets:
    print(ds_name.name)

diabetes-local
diabetes-datastore-path
diabetes-table


<h2> Read Data in a Notebook </h2>

In [25]:
import mltable

registered_data_asset = ml_client.data.get(name='diabetes-table', version=1)
tbl = mltable.load(f"azureml:/{registered_data_asset.id}")
df = tbl.to_pandas_dataframe()
df.head(5)

UserErrorException: Dataflow visit error: ExecutionError(StreamError(NotFound))
	VisitError(ExecutionError(StreamError(NotFound)))
=> Failed with execution error: error in streaming from input data sources
	ExecutionError(StreamError(NotFound)); Not able to find MLTable file

<h2> Use Data in a Job </h2>

In [26]:
import os

script_folder="src"
os.makedirs(script_folder, exist_ok=True)
print(script_folder, "folder created")

src folder created


In [33]:
%%writefile $script_folder/move-data.py
import argparse
import pandas as pd
import numpy as np
from pathlib import Path

def main(args):
    df=get_data(path=args.input_data)
    output_df=df.to_csv((Path(args.output_datastore)/"diabetes.csv"), index=False)

def get_data(path):
    df=pd.read_csv(path)
    
    row_count=(len(df))
    print(f"Analyzing {row_count} rows of data")
    return df

def parse_args():
    parser=argparse.ArgumentParser()

    parser.add_argument("--input_data", dest="input_data", type=str)
    parser.add_argument("--output_datastore", dest="output_datastore",type=str)

    args=parser.parse_args()
    return args

if __name__=="__main__":
    print("\n\n")
    print("*"*60)
    args=parse_args()

    #run main function
    main(args=args)

Overwriting src/move-data.py


<h2> Submit a Job</h2>

In [37]:
from azure.ai.ml import Input, Output
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import command

my_job_inputs={
    "local_data": Input(
            type=AssetTypes.URI_FILE, 
            path="azureml:diabetes-local:1"
            )
}

my_job_outputs={
    "datastore_data":Output(
            type=AssetTypes.URI_FOLDER, 
            path="azureml://datastores/blob_training_data/paths/datastore-path"
            )
}

In [38]:
command_shell="python move-data.py --input_data=${{inputs.local_data}} --output_datastore ${{outputs.datastore_data}}"

In [39]:
# Configure Job:
job=command(
    code='/.src',  # name of the folder
    command=command_shell,
    inputs=my_job_inputs,
    outputs=my_job_outputs,
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="move-diabetes-data",
    experiment_name="move-diabetes-data"
)

# submit job
returned_job=ml_client.create_or_update(entity=job)
aml_url=returned_job.studio_url
print(f"Monitor your job at:{aml_url}")

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Exception: {
  "result": "Failed",
  "errors": [
    {
      "message": "Can't find directory or file in resolved absolute path: /mnt/batch/tasks/shared/LS_root/mounts/clusters/cib8165ca7f2e4432ba1/code/Users/ritishadhikari/azureml:/.src.; Not a valid URL.; In order to specify a git path, please provide the correct path prefixed with 'git+\n; In order to specify an existing codes, please provide the correct registry path prefixed with 'azureml://':\n; In order to specify an existing codes, please provide the correct registry path prefixed with 'azureml://':\n; Either version or label is not provided for code or the id is not valid.",
      "path": "component.code",
      "value": "azureml:/.src"
    }
  ]
}