In [13]:
import azureml
from IPython.display import display, Markdown
from azureml.core import Experiment, ScriptRunConfig, Workspace, RunConfiguration
from azureml.core.dataset import Dataset
from azureml.core.environment import Environment
from azureml.core.runconfig import DockerConfiguration

from model_drift import settings, helpers
# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)


Azure ML SDK Version:  1.38.0


In [14]:
# Connect to workspace
ws = Workspace.from_config(settings.AZUREML_CONFIG)

In [28]:
input_dataset_name="results"
experiment_name = 'generate-drift-metrics'
env_name = "monitoring"
exp = Experiment(workspace=ws, name=experiment_name)

environment_file = settings.CONDA_ENVIRONMENT_FILE
project_dir = settings.SRC_DIR
pytorch_env = Environment.from_conda_specification(env_name, file_path =str(environment_file))
pytorch_env.register(workspace=ws)
build = pytorch_env.build(workspace=ws)
pytorch_env.environment_variables["RSLEX_DIRECT_VOLUME_MOUNT"] = "True"


# Run Configuration
run_config = RunConfiguration()
run_config.environment_variables["RSLEX_DIRECT_VOLUME_MOUNT"] = "True"

run_config.environment = pytorch_env
run_config.docker = DockerConfiguration(use_docker=True, shm_size="100G")

# Note: COMMON RUNTIME has a bug where print statements sometimes, disappear.
# Set this flag as a workaround to use the legacy runtime.
run_config.environment_variables["AZUREML_COMPUTE_USE_COMMON_RUNTIME"] = "false"

# Input Dataset
dataset = Dataset.get_by_name(ws, name=input_dataset_name)

display(Markdown(f"""
- Experiment: [{exp.name}]({exp.get_portal_url()})
"""))


- Experiment: [generate-drift-metrics](https://ml.azure.com/experiments/id/94c08fc1-81cd-4793-82a9-77918596b36e?wsid=/subscriptions/9ca8df1a-bf40-49c6-a13f-66b72a85f43c/resourcegroups/MLOps-Prototype/workspaces/MLOps_shared&tid=72f988bf-86f1-41af-91ab-2d7cd011db47)


In [29]:
compute_target = "cpu-cluster"

common_args = {
'run_azure': 1,
"input_dir": dataset.as_named_input('input').as_mount(),
"output_dir": "./outputs/",
"generate_name": 0,
"num_workers": 10,

"vae_filter": 'all-data',
"classifier_filter": 'frontal_only',
"classifier_dataset": "padchest-finetuned-chx-frontalonly",

"window": "30D",
"stride": "D",
"min_periods": 150,
"ref_frontal_only": 1,
"include_metadata": 1,
"replacement": 1,
"sample_size": 2500,
"n_samples": 20,

"start_date": "2012-11-01",
"end_date":"2015-02-01",
}

# Run trial with no modifications
config = ScriptRunConfig(
        source_directory=str(project_dir),
        script="scripts/drift/generate-drift-csv.py",
        arguments=helpers.argsdict2list(common_args),
    )
run_config.target = compute_target
config.run_config = run_config
run = exp.submit(config)



In [30]:
# Run Experimental Trials with Data injection

## Performance degradation experiments
qbase = {"bad_start_date": "2014-06-01", "bad_sample_start_date": "2014-06-01", "bad_sample_end_date":  "2014-12-31", "indist_remove_date": "2014-06-01"}
q100 = {"bad_q": 1.0, **qbase}
q25 = {"bad_q": 0.25, **qbase}
q5 = {"bad_q": 0.05, **qbase}

peds = {} #TODO

## Lateral Experiment
lateral = {"lateral_add_date":"2014-06-01" ,"indist_remove_date": "2014-09-01"}

trials = [
    q100, q25, q5,
    lateral,
    # peds,
    ]


for trial in trials:
    args = {**common_args, **trial}
    config = ScriptRunConfig(
        source_directory=str(project_dir),
        script="scripts/drift/generate-drift-csv.py",
        arguments=helpers.argsdict2list(args),
    )
    run_config.target = compute_target
    config.run_config = run_config
    run = exp.submit(config)