In [24]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import AmlCompute
from azure.ai.ml.entities import Environment
from azure.ai.ml import command, Input
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)


Found the config file in: /config.json


In [4]:
cpu_compute_target = "cpu-cluster"
try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
except Exception:
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        type="amlcompute",
        size="STANDARD_DS3_V2",
        min_instances=0,
        max_instances=4,
        idle_time_before_scale_down=180,
        tier="Dedicated",
    )

    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster).result()

In [25]:
env_name = 'sklearn-env'
job_env = Environment(
    name=env_name,
    description="sklearn 0.24.2",
    conda_file='./env/conda.yaml',
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)
job_env = ml_client.environments.create_or_update(job_env)

In [24]:
job = command(
    code="./src",
    command="python DecissionTree.py --dataset_path ${{inputs.dataset_path}} --min_samples_split ${{inputs.min_samples_split}} --max_depth ${{inputs.max_depth}}",
    environment=f"{job_env.name}:{job_env.version}",
    experiment_name='co2-emissions-decission-tree-regression-model',
    display_name="co2-emissions-decission-tree-regression-model",
    inputs={
        "dataset_path": Input(
            type="uri_file",
            path="azureml://subscriptions/ba1f7bf8-2be6-4bed-b818-c745bda74905/resourcegroups/primer_proyecto/workspaces/primer_proyecto/datastores/workspaceblobstore/paths/UI/2023-10-23_204742_UTC/processed_data.csv",
        ),
        "min_samples_split": 3,
        "max_depth": 5,
    },
    compute=cpu_compute_target,
)


In [25]:
job_for_sweep = job(
    min_samples_split=Choice(values=[3,5,7,9]),
    max_depth=Choice(values=[3,5,7,9,11,13]),
)

sweep_job = job_for_sweep.sweep(
    compute=cpu_compute_target,
    sampling_algorithm="random",
    primary_metric="R2 Score",
    goal="Maximize",
    max_total_trials=12,
    max_concurrent_trials=4,
)

returned_sweep_job = ml_client.create_or_update(sweep_job)
ml_client.jobs.stream(returned_sweep_job.name)


RunId: honest_farm_jnbrl2yrgj
Web View: https://ml.azure.com/runs/honest_farm_jnbrl2yrgj?wsid=/subscriptions/ba1f7bf8-2be6-4bed-b818-c745bda74905/resourcegroups/primer_proyecto/workspaces/primer_proyecto

Streaming azureml-logs/hyperdrive.txt

[2023-10-29T04:36:57.114950][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space
[2023-10-29T04:36:57.5384315Z][SCHEDULER][INFO]Scheduling job, id='honest_farm_jnbrl2yrgj_0' 
[2023-10-29T04:36:57.6846120Z][SCHEDULER][INFO]Scheduling job, id='honest_farm_jnbrl2yrgj_1' 
[2023-10-29T04:36:57.7947661Z][SCHEDULER][INFO]Scheduling job, id='honest_farm_jnbrl2yrgj_2' 
[2023-10-29T04:36:57.9289769Z][SCHEDULER][INFO]Successfully scheduled a job. Id='honest_farm_jnbrl2yrgj_0' 
[2023-10-29T04:36:57.9349121Z][SCHEDULER][INFO]Scheduling job, id='honest_farm_jnbrl2yrgj_3' 
[2023-10-29T04:36:57.890319][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.
[2023-10-29T04:36:58.0134708Z][SCHEDULER][

In [34]:
job = command(
    code="./src",
    command="python RandomForest.py --dataset_path ${{inputs.dataset_path}} --n_estimators ${{inputs.n_estimators}} --max_depth ${{inputs.max_depth}}",
    environment=f"{job_env.name}:{job_env.version}",
    experiment_name='co2-emissions-random-forest-regression-model',
    display_name="co2-emissions-random-forest-regression-model",
    inputs={
        "dataset_path": Input(
            type="uri_file",
            path="azureml://subscriptions/ba1f7bf8-2be6-4bed-b818-c745bda74905/resourcegroups/primer_proyecto/workspaces/primer_proyecto/datastores/workspaceblobstore/paths/UI/2023-10-23_204742_UTC/processed_data.csv",
        ),
        "n_estimators": 100,
        "max_depth": 5,
    },
    compute=cpu_compute_target,
)

In [35]:
job_for_sweep = job(
    n_estimators=Choice(values=[10, 50, 100, 150, 200]),
    max_depth=Choice(values=[5, 10, 15, 20, 25, 30]),
)

sweep_job = job_for_sweep.sweep(
    compute=cpu_compute_target,
    sampling_algorithm="random",
    primary_metric="R2 Score",
    goal="Maximize",
    max_total_trials=12,
    max_concurrent_trials=4,
)

returned_sweep_job = ml_client.create_or_update(sweep_job)
ml_client.jobs.stream(returned_sweep_job.name)

Uploading src (0.0 MBs):   0%|          | 0/4964 [00:00<?, ?it/s]Uploading src (0.0 MBs): 100%|██████████| 4964/4964 [00:00<00:00, 65904.21it/s]




RunId: coral_sock_wg57ztmv6x
Web View: https://ml.azure.com/runs/coral_sock_wg57ztmv6x?wsid=/subscriptions/ba1f7bf8-2be6-4bed-b818-c745bda74905/resourcegroups/primer_proyecto/workspaces/primer_proyecto
