In [2]:
from pathlib import Path

import azureml
from IPython.display import display, Markdown
from azureml.core import Run, Model
from azureml.core import Datastore, Experiment, ScriptRunConfig, Workspace, RunConfiguration
from azureml.core.dataset import Dataset
from azureml.data import OutputFileDatasetConfig
from azureml.core.environment import Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.exceptions import UserErrorException

from model_drift import settings

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Failure while loading azureml_run_type_providers. Failed to load entrypoint automl = azureml.train.automl.run:AutoMLRun._from_run_dto with exception (cloudpickle 2.0.0 (d:\code\mlopsday2\medimaging-modeldriftmonitoring\.venv\lib\site-packages), Requirement.parse('cloudpickle<2.0.0,>=1.1.0'), {'azureml-dataprep'}).


Azure ML SDK Version:  1.34.0


In [3]:
# Connect to workspace
subscription_id = '9ca8df1a-bf40-49c6-a13f-66b72a85f43c'
resource_group = 'MLOps-Prototype'
workspace_name = 'MLOps_shared'

try:
    ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
    ws.write_config()
    print('Library configuration succeeded')
except:
    print('Workspace not found')

print("Workspace:", ws.name)

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Library configuration succeeded
Workspace: MLOps_shared


In [4]:
## TODO: Incorporate model and model registration into notebook

# experiment_name = 'chexpert-vae-tune'
# exp = Experiment(workspace=ws, name=experiment_name)
# for r in exp.get_runs():
#     if r.display_name  == "plum_loquat_9kq2qqtn":
#         break
# for run in r.get_children():
#     if run.display_name == "wheat_book_mmkzkspg":
#         break

# from azureml.core.resource_configuration import ResourceConfiguration

# model = run.register_model(model_name='chexpert-vae-wheat_book_mmkzkspg-049',
#                            model_path='outputs/checkpoints/last.ckpt/049.ckpt',
#                            model_framework='PyTorch',
#                            model_framework_version='1.8',
#                            description="TBD",
#                            resource_configuration=ResourceConfiguration(cpu=8, memory_in_gb=2, gpu=True))

In [5]:
import re

dbg = True
log_refresh_rate = 1

# Name experiement
input_dataset_name="padchest"
experiment_name = 'chexpert-vae-OnPadChest-dbg'
model_name =  "chexpert-vae-wheat_book_mmkzkspg-049"
datastore_name = "vaeresults_padchest"
env_name = "vae"
compute_target = "nc6-uswest2"
compute_target = "NC24rs-v3-usw2-d"


exp = Experiment(workspace=ws, name=experiment_name)
environment_file = settings.CONDA_ENVIRONMENT_FILE
project_dir = Path("./experiment")
pytorch_env = Environment.from_conda_specification(env_name, file_path =str(environment_file))

pytorch_env.register(workspace=ws)
build = pytorch_env.build(workspace=ws)
pytorch_env.environment_variables["RSLEX_DIRECT_VOLUME_MOUNT"] = "True"
pc_dataset = Dataset.get_by_name(ws, name=input_dataset_name)
datastore = ws.datastores[datastore_name]
output_dataset_name = re.sub(r'\W+', "" ,f"{model_name}".replace('-', "_"))
output_dir =  f"{output_dataset_name}"

if dbg:
    output_dataset_name = f"{output_dataset_name}_dbg"
    output_dir = f"{output_dir}/dbg"

output_dir = output_dir.replace("//", "/")
output_dataset = OutputFileDatasetConfig(name=output_dataset_name, destination=(datastore, output_dir.strip("/")+"/"))

print("Output Dataset Name:", output_dataset_name)
print("Experiment:", exp.name)
print("Environment:", pytorch_env.name)

run_config = RunConfiguration()
run_config.environment = pytorch_env
run_config.docker = DockerConfiguration(use_docker=True, shm_size="100G")

run_config.output_data = {"output_dataset_name": output_dataset}


args = [
    "--run_azure", 1,
    "--model", model_name,
    '--data_folder', pc_dataset.as_named_input('padchestv1').as_mount(),
    '--progress_bar_refresh_rate', log_refresh_rate,
    "--log_every_n_steps", log_refresh_rate,
    "--flush_logs_every_n_steps", log_refresh_rate,
    "--batch_size", 128,
    "--accelerator", "ddp",
    "--output_dir", output_dataset.as_mount(),
    "--write_recon", 1,
    "--write_grid",  1,
    "--latent_output_dir", "./outputs/",
    "--append_run_name", 1,
    ]

if dbg:
    args += [
        '--limit_predict_batches', 10,
    ]


config = ScriptRunConfig(
    source_directory = str(project_dir), 
    script = "score.py",
    arguments=args,
)
run_config.target = compute_target
config.run_config = run_config

run = exp.submit(config)
display(Markdown(f"""
- Experiement: [{run.experiment.name}]({run.experiment.get_portal_url()})
- Run: [{run.display_name}]({run.get_portal_url()})
- Target: {config.run_config.target}
"""))

Output Dataset Name: chexpert_vae_wheat_book_mmkzkspg_049_dbg
Experiment: chexpert-vae-OnPadChest-dbg
Environment: vae



- Experiement: [chexpert-vae-OnPadChest-dbg](https://ml.azure.com/experiments/chexpert-vae-OnPadChest-dbg?wsid=/subscriptions/9ca8df1a-bf40-49c6-a13f-66b72a85f43c/resourcegroups/MLOps-Prototype/workspaces/MLOps_shared&tid=72f988bf-86f1-41af-91ab-2d7cd011db47)
- Run: [shy_dinner_p1mj4c8c](https://ml.azure.com/runs/chexpert-vae-OnPadChest-dbg_1633632632_5c17a264?wsid=/subscriptions/9ca8df1a-bf40-49c6-a13f-66b72a85f43c/resourcegroups/MLOps-Prototype/workspaces/MLOps_shared&tid=72f988bf-86f1-41af-91ab-2d7cd011db47)
- Target: NC24rs-v3-usw2-d


In [91]:
# TODO: Move to another notebook or delete?

# experiment_name = "dataset_debug"
# chex_dataset = Dataset.get_by_name(ws, name='chexpert')
# pc_dataset = Dataset.get_by_name(ws, name=input_dataset_name)


# exp = Experiment(workspace=ws, name=experiment_name)
# environment_file = settings.CONDA_ENVIRONMENT_FILE
# project_dir = Path("./experiment")
# pytorch_env = Environment.from_conda_specification(env_name, file_path =str(environment_file))

# pytorch_env.register(workspace=ws)
# build = pytorch_env.build(workspace=ws)

# pytorch_env.environment_variables["RSLEX_DIRECT_VOLUME_MOUNT"] = "True"

# print("Experiment:", exp.name)
# print("Environment:", pytorch_env.name)

# run_config = RunConfiguration()
# run_config.environment = pytorch_env
# run_config.docker = DockerConfiguration(use_docker=True, shm_size="100G")

# args = [
#     '--data1', pc_dataset.as_named_input('padchestv1').as_mount(),
#     '--data2', chex_dataset.as_named_input('chexpertv1').as_mount(),
#     ]


# config = ScriptRunConfig(
#     source_directory = str(project_dir), 
#     script = "dataset_test.py",
#     arguments=args,
# )

# config.run_config = run_config

# config.run_config.target = compute_target

# # config.run_config.target = "nc6-uswest2"

# run = exp.submit(config)
# display(Markdown(f"""
# - Experiement: [{run.experiment.name}]({run.experiment.get_portal_url()})
# - Run: [{run.display_name}]({run.get_portal_url()})
# - Target: {config.run_config.target}
# """))

Experiment: dataset_debug
Environment: vae



- Experiement: [dataset_debug](https://ml.azure.com/experiments/dataset_debug?wsid=/subscriptions/9ca8df1a-bf40-49c6-a13f-66b72a85f43c/resourcegroups/MLOps-Prototype/workspaces/MLOps_shared&tid=72f988bf-86f1-41af-91ab-2d7cd011db47)
- Run: [dreamy_school_t7mkvb9q](https://ml.azure.com/runs/dataset_debug_1633542663_b1d850e4?wsid=/subscriptions/9ca8df1a-bf40-49c6-a13f-66b72a85f43c/resourcegroups/MLOps-Prototype/workspaces/MLOps_shared&tid=72f988bf-86f1-41af-91ab-2d7cd011db47)
- Target: nc6-uswest2
