###  Testing with SDK v2

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml import command, Input
from azure.identity import DefaultAzureCredential
import sys

# Enter details of your AML workspace
subscription_id = "840b5c5c-3f4a-459a-94fc-6bad2a969f9d"
resource_group = "ml"
workspace = "ws02ent"
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

sys.path.append("../") # go to parent dir

In [None]:
from src.ray_on_aml.core import Ray_On_AML

ray_on_aml =Ray_On_AML(ml_client=ml_client, compute_cluster ="ds11" )
#Note that if you need to customize the pip installation of the cluster, you also needs to support the ray package e.g. ray[data] which 
#match the version of the ray package(s) in your compute instance. If you don't specify pip_packages then ray[default] is inserted 
#automatically

#For use as client mode, uncomment these lines
# ray = ray_on_aml.getRay(num_node=2,pip_packages=["ray[data]","fastparquet", "azureml-mlflow", "pyarrow==6.0.1", "dask==2022.2.0", "adlfs", "fsspec"])
# client = ray.init(f"ray://{ray_on_aml.headnode_private_ip}:10001")

#use CI as head node
ray = ray_on_aml.getRay(ci_is_head=True, num_node=2,pip_packages=["ray[data]", "fastparquet", "azureml-mlflow", "pyarrow==6.0.1", "dask==2022.2.0", "adlfs", "fsspec"])

In [2]:
#Example of using input and output for interactive job
from azure.ai.ml import command, Input, Output
from src.ray_on_aml.core import Ray_On_AML
import logging
ray_on_aml =Ray_On_AML(ml_client=ml_client, compute_cluster ="ds11", verbosity=logging.INFO )

inputs={

    "ISDWeather": Input(
        type="uri_folder",
        path="azureml://datastores/adlsstore0001/paths/ISDWeather/year=2008",
    )
}

outputs={
    "output1": Output(
        type="uri_folder",
        path="azureml://datastores/adlsstore0001/paths/dev",
    ),
    "output2": Output(
        type="uri_folder",
        path="azureml://datastores/adlsstore0001/paths/dev",
    )
}

ray = ray_on_aml.getRay(inputs = inputs,outputs=outputs, num_node=2,pip_packages=["ray[data]", "fastparquet", "azureml-mlflow", "pyarrow==6.0.1", "dask==2022.2.0", "adlfs", "fsspec"])

[32mUploading .tmp (0.0 MBs): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4211/4211 [00:00<00:00, 108466.94i

Waiting cluster to start and return head node's ip
.....
 cluster is ready, head node ip  10.0.0.8


In [3]:
client = ray.init(f"ray://{ray_on_aml.headnode_private_ip}:10001")

ray.cluster_resources()

{'object_store_memory': 4305602150.0,
 'CPU': 2.0,
 'node:10.0.0.8': 1.0,
 'memory': 8611204302.0}

In [14]:
ray_on_aml.mount_points

[2m[33m(raylet, ip=10.0.0.9)[0m   aiogrpc.init_grpc_aio()
[2m[33m(raylet, ip=10.0.0.9)[0m   loop = asyncio.get_event_loop()


{'master_ip': 'None',
 'ISDWeather': '/mnt/azureml/cr/j/43c0b9ebce9445aaa0254793e4dc8e37/cap/data-capability/wd/INPUT_ISDWeather',
 'output1': '/mnt/azureml/cr/j/43c0b9ebce9445aaa0254793e4dc8e37/cap/data-capability/wd/output1',
 'output2': '/mnt/azureml/cr/j/43c0b9ebce9445aaa0254793e4dc8e37/cap/data-capability/wd/output2'}

In [4]:
# from adlfs import AzureBlobFileSystem
# import dask.dataframe as dd
# from ray.util.dask import ray_dask_get, enable_dask_on_ray, disable_dask_on_ray
# enable_dask_on_ray()
# abfs = AzureBlobFileSystem(account_name="azureopendatastorage",  container_name="isdweatherdatacontainer")

# storage_options = {'account_name': 'azureopendatastorage'}
# ddf = dd.read_parquet('az://nyctlc/green/puYear=2019/puMonth=*/*.parquet', storage_options=storage_options)

data = ray.data.read_parquet(ray_on_aml.mount_points['ISDWeather'])

# data = ray.data.read_parquet("az://isdweatherdatacontainer/ISDWeather/year=2009", filesystem=abfs)
print(data.count())

[2m[36m(_get_read_tasks pid=274)[0m [dataset]: Run `pip install tqdm` to enable progress reporting.


[2m[33m(raylet, ip=10.0.0.9)[0m   aiogrpc.init_grpc_aio()
[2m[33m(raylet, ip=10.0.0.9)[0m   loop = asyncio.get_event_loop()


97581959


In [5]:
client.disconnect()

###  Testing with SDK v1

In [10]:
ray_on_aml.shutdown(end_all_runs=True)

In [None]:
from azureml.core import Workspace
import sys
sys.path.append("../") # go to parent dir
import importlib


In [None]:
from src.ray_on_aml.core import Ray_On_AML
import ray
ws = Workspace.from_config()
ray_on_aml =Ray_On_AML(ws=ws, compute_cluster ="ds11",maxnode=2)
ray_on_aml.getRay(num_node=2,pip_packages=["torch==1.13.0", "azureml-mlflow"], ci_is_head=True)

# head_ip = ray_on_aml.getRay(num_node=2,pip_packages=["torch==1.13.0", "azureml-mlflow"], ci_is_head=True)

# client = ray.init(f"ray://{head_ip}:10001")


In [None]:
ray.shutdown()

In [None]:
ray.cluster_resources()

In [None]:
ray_on_aml.shutdown()

In [None]:
import ray

# Load data.
dataset = ray.data.read_csv("https://azuremlexamples.blob.core.windows.net/datasets/iris.csv").repartition(4)

train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)

# Create a test dataset by dropping the target column.
test_dataset = valid_dataset.drop_columns(cols=["target"])
# Create a preprocessor to scale some columns.

import numpy as np

from ray.data.preprocessors import Concatenator, Chain, StandardScaler

# Create a preprocessor to scale some columns and concatenate the result.
# preprocessor = Chain(
#     StandardScaler(columns=["mean radius", "mean texture"]),
#     Concatenator(exclude=["target"], dtype=np.float32),
# )


In [None]:
pip install pyarrow==6.0.1

In [None]:
import torch
import torch.nn as nn
from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present

from ray import train
from ray.air import session
from ray.air.config import ScalingConfig
from ray.train.torch import TorchCheckpoint, TorchTrainer


def create_model(input_features):
    return nn.Sequential(
        nn.Linear(in_features=input_features, out_features=16),
        nn.ReLU(),
        nn.Linear(16, 16),
        nn.ReLU(),
        nn.Linear(16, 1),
        nn.Sigmoid(),
    )


def train_loop_per_worker(config):
    batch_size = config["batch_size"]
    lr = config["lr"]
    epochs = config["num_epochs"]
    num_features = config["num_features"]

    # Get the Ray Dataset shard for this data parallel worker,
    # and convert it to a PyTorch Dataset.
    train_data = train.get_dataset_shard("train")
    # Create model.
    model = create_model(num_features)
    model = train.torch.prepare_model(model)

    loss_fn = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    for cur_epoch in range(epochs):
        for batch in train_data.iter_torch_batches(
            batch_size=batch_size, dtypes=torch.float32
        ):
            # "concat_out" is the output column of the Concatenator.
            inputs, labels = batch["concat_out"], batch["target"]
            optimizer.zero_grad()
            predictions = model(inputs)
            train_loss = loss_fn(predictions, labels.unsqueeze(1))
            train_loss.backward()
            optimizer.step()
        loss = train_loss.item()
        session.report({"loss": loss}, checkpoint=TorchCheckpoint.from_model(model))


num_features = len(train_dataset.schema().names) - 1

trainer = TorchTrainer(
    train_loop_per_worker=train_loop_per_worker,
    train_loop_config={
        "batch_size": 128,
        "num_epochs": 20,
        "num_features": num_features,
        "lr": 0.001,
    },
    scaling_config=ScalingConfig(
        num_workers=2,  # Number of workers to use for data parallelism.
        use_gpu=False,
        trainer_resources={"CPU": 1},  # so that the example works on Colab.
    ),
    datasets={"train": train_dataset},
    preprocessor=preprocessor,
)
# Execute training.
result = trainer.fit()
print(f"Last result: {result.metrics}")
# Last result: {'loss': 0.6559339960416158, ...}

In [None]:
pip install --upgrade ray

In [None]:
ray_on_aml.shutdown()

In [None]:
import ray
client = ray.init(f"ray://{head_ip}:10001")


In [None]:
client

In [None]:
ray.shutdown()

In [None]:
client.disconnect()