## Example Notebook to submit child run and retrieve best model from tree of children

### Azure Databricks cluster setup instructions

In cluster libraries section:

1. Install idna==2.5 from PyPi

In [None]:
from azureml.core import Workspace, Experiment, Run
import sys, os, random

In [None]:
ws = Workspace(workspace_name="my-ws", resource_group="my-rg", subscription_id = "my-sub-id")

In [None]:
exp = Experiment(ws,"test-exp")

In [None]:
! mkdir test

Script for child run

In [None]:
%%writefile test/train_child.py

from azureml.core import Run
import random

run = Run.get_context()

# Check the child run's parent id
print("Hello from child with parent {}".format(run.parent.id))

run.log("accuracy",random.uniform(0.5,1.0))
with open("model.txt","w") as f:
    f.write("dummy model")
run.upload_file("model.txt","model.txt")

Script for parent run that submits child run to MLC cluster

In [None]:
%%writefile test/train.py

from azureml.core import Run
from azureml.core import ScriptRunConfig
import random

child_runconfig = ScriptRunConfig("/databricks/driver/test","train_child.py")

child_runconfig.run_config.history.snapshot_project = False
child_runconfig.run_config.target = "cpu-cluster"

run = Run.get_context()

## Log some value
print("Hello from parent {}".format(run.id))
run.log("accuracy",random.uniform(0.5,1.0))

## Save model. Note that model can be any file-serializable content
with open("model.txt","w") as f:
    f.write("dummy model")
run.upload_file("model.txt","model.txt")

## Submit 2 child runs
child1 = run.submit_child(child_runconfig)
child2 = run.submit_child(child_runconfig)

## Wait for children to complete
child1.wait_for_completion(show_output=True)
child2.wait_for_completion(show_output=True)

Submit a parent run that's local to Databricks cluster, then have parent submit child runs to Machine Learning Compute

In [None]:
from azureml.core import ScriptRunConfig

runconfig = ScriptRunConfig("/databricks/driver/test","train_child.py")

runconfig = ScriptRunConfig("/databricks/driver/test","train.py")
runconfig.run_config.environment.python.conda_dependencies = conda_dependencies
runconfig.run_config.target = "local"
runconfig.run_config.history.snapshot_project = False
runconfig.run_config.environment.python.user_managed_dependencies = True
runconfig.run_config.environment.python.interpreter_path = sys.executable
runconfig.run_config.environment.docker.enabled = False

parent_run = exp.submit(config=runconfig)
parent_run.wait_for_completion(show_output=True)

Example of querying the tree of runs with common parent to find the one with best metric value

In [None]:
def get_best_run(parent_run, metric_name, metric_direction = "descend"):
    if metric_direction not in ["ascend", "descend"]:
        raise ValueError("metric_direction must be 'ascend' or 'descend'")
    runs = list(parent_run.get_children(recursive=True))
    runs += [parent_run]  # Append parent to list of children
    runs = [(run, run.get_metrics(name=metric_name) for run in runs]
    if metric_direction == "ascend":
        best_run = min(runs, key = lambda x: x[1])
    else:
        best_run = max(runs, key = lambda x: x[1])
    return best_run

In [None]:
best_run, best_metric = get_best_run(parent_run,"accuracy","descend")
print("Run Id:", best_run.id)
print("Accuracy:", best_metric)

Register the model from best run

In [None]:
best_model = best_run.register_model(model_name= "dummy-best-model", model_path = "model.txt")
print("Double-check run id:", best_model.run_id)
best_model