In [1]:
from azure.ai.ml import command, Input, MLClient, UserIdentityConfiguration, ManagedIdentityConfiguration
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.dsl import pipeline
from dotenv import load_dotenv
import pandas as pd
import os

# specify the details of your subscription
SUBSCRIPTION_ID = "e5615bfe-b43b-41ce-bccb-b78867c2ce63"
RESOURCE_GROUP = "rg-dp100-demo-001"
WORKSPACE_NAME = "mlw-dp100-demo"
DATASTORE_NAME = "blobdatastore2"

# get a handle to the subscription
load_dotenv("python.env")

ml_client = MLClient(DefaultAzureCredential(), 
                     subscription_id=SUBSCRIPTION_ID, 
                     resource_group_name=RESOURCE_GROUP,
                     workspace_name=WORKSPACE_NAME)

### URI File Data Asset

Supported paths:
- Local: `./<path>`
- Azure Blob Storage:
     `wasbs://<accountname>.blob.core.windows.net/<containername>/<path_to_data>/`
    
- Azure Data Lake Storage (Gen 2):
    `abfss://<file_system>@<account_name>.dfs.core.windows.net/<folder>/<file>`
    
- Datastore:
    `azureml://datastores/<datastore_name>/paths/<folder>/<file>`

In [2]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes


# create a data asset from a local file
data_asset_local = './data/titanic.csv'

my_data = Data(
    path=data_asset_local,
    type=AssetTypes.URI_FILE,
    description="Data asset created from local file",
    name="titanic_data_asset_from_local",
)

ml_client.data.create_or_update(my_data)

[32mUploading titanic.csv[32m (< 1 MB): 100%|##########| 60.3k/60.3k [00:00<00:00, 241kB/s]
[39m



Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'titanic_data_asset_from_local', 'description': 'Data asset created from local file', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourceGroups/rg-dp100-demo-001/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-demo/data/titanic_data_asset_from_local/versions/1', 'Resource__source_path': None, 'base_path': 'd:\\Repositories\\GitHub\\dp-100', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x0000023895D71590>, 'serialize': <msrest.serialization.Serializer object at 0x0000023895D7DBD0>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourcegroups/rg-dp100-demo-001/workspaces/mlw-dp100-demo/datastores/workspaceblobstore/

In [18]:
# create a data asset from datastore:
# with the datastore authorized via SAS token
data_asset_datastore = 'azureml://datastores/blob_titanic_sas/paths/titanic.csv'

my_data = Data(
    path=data_asset_datastore,
    type=AssetTypes.URI_FILE,
    description="Data asset created from datastore",
    name="titanic_data_asset_from_datastore_sas",
)

ml_client.data.create_or_update(my_data)

Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'titanic_data_asset_from_datastore_sas', 'description': 'Data asset created from datastore', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourceGroups/rg-dp100-demo-001/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-demo/data/titanic_data_asset_from_datastore_sas/versions/1', 'Resource__source_path': None, 'base_path': 'd:\\Repositories\\GitHub\\dp-100', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x0000023897453990>, 'serialize': <msrest.serialization.Serializer object at 0x0000023897452110>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourcegroups/rg-dp100-demo-001/workspaces/mlw-dp100-demo/datastores/blob

In [21]:
# with the datastore authorized via managed identity
data_asset_datastore = 'azureml://datastores/blob_titanic_identity/paths/titanic.csv'

my_data = Data(
    path=data_asset_datastore,
    type=AssetTypes.URI_FILE,
    description="Data asset created from datastore",
    name="titanic_data_asset_from_datastore_identity",
)

ml_client.data.create_or_update(my_data)

Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'titanic_data_asset_from_datastore_identity', 'description': 'Data asset created from datastore', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourceGroups/rg-dp100-demo-001/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-demo/data/titanic_data_asset_from_datastore_identity/versions/3', 'Resource__source_path': None, 'base_path': 'd:\\Repositories\\GitHub\\dp-100', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x00000238973265D0>, 'serialize': <msrest.serialization.Serializer object at 0x00000238F4BE1650>, 'version': '3', 'latest_version': None, 'path': 'azureml://subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourcegroups/rg-dp100-demo-001/workspaces/mlw-dp100-demo/datas

In [14]:
data_asset_blob = 'wasbs://stdp100demo.blob.core.windows.net/datacontainer/titanic.csv'

my_data = Data(
    path=data_asset_blob,
    type=AssetTypes.URI_FILE,
    description="Data asset created from blob container",
    name="titanic_data_asset_from_blob",
)

ml_client.data.create_or_update(my_data)

Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'titanic_data_asset_from_blob', 'description': 'Data asset created from blob container', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/e5615bfe-b43b-41ce-bccb-b78867c2ce63/resourceGroups/rg-dp100-demo-001/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-demo/data/titanic_data_asset_from_blob/versions/1', 'Resource__source_path': None, 'base_path': 'd:\\Repositories\\GitHub\\dp-100', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x0000023897368590>, 'serialize': <msrest.serialization.Serializer object at 0x0000023895DA7210>, 'version': '1', 'latest_version': None, 'path': 'wasbs://stdp100demo.blob.core.windows.net/datacontainer/titanic.csv', 'datastore': None})