# Register data assets

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

import os
import pandas as pd

import yaml
with open("../config/config.yaml") as f:
    cfg = yaml.load(f, Loader=yaml.FullLoader)

# enter details of your AML workspace
subscription_id = os.getenv("subscription_id")
resource_group = os.getenv("resource_group")
workspace = os.getenv("workspace")

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)
    

# Create data asset

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

try:
    registered_data_asset = ml_client.data.get(name="", version="1")
    print("Found data asset. Will not create again")
except Exception as ex:
    my_data = Data(
        path=cfg["data"]["training"]["path"],
        type=AssetTypes.URI_FILE,
        description=cfg["data"]["training"]["description"],
        name=cfg["data"]["training"]["name"],
        version=cfg["data"]["training"]["version"],
    )
    ml_client.data.create_or_update(my_data)
    registered_data_asset = ml_client.data.get(name="", version="1")
    print("Created data asset")

In [None]:
df = pd.read_csv(registered_data_asset.path)
df.head()

# Load data

## Datastore URI Folder - Delta lake

In [None]:
import mltable

data_asset = ml_client.data.get("", version="1")

tbl = mltable.from_delta_lake(delta_table_uri=data_asset.path)
df = tbl.to_pandas_dataframe()
df

## Datastore URI File - File

In [None]:
data_asset = ml_client.data.get("", version="1")

df = pd.read_table(data_asset.path)
df

# MLTable from DL

In [None]:
import mltable

# define the cloud path containing the delta table (where the _delta_log file is stored)
st_account = ""
container = ""
path_to_delta_table = ""
delta_table = f"abfss://{container}@{st_account}.dfs.core.windows.net/{path_to_delta_table}"

# create an MLTable. Note the timestamp_as_of parameter for time travel.
tbl = mltable.from_delta_lake(
    delta_table_uri=delta_table,
)
df = tbl.to_pandas_dataframe()
df

In [None]:
df = tbl.to_pandas_dataframe()
df