In [4]:
pip install azureml-train-core azureml-train-restclients-hyperdrive

Note: you may need to restart the kernel to use updated packages.


In [5]:
!pip install protobuf==3.20.*



In [6]:
from azureml.core import Workspace, Datastore, Experiment, Dataset, ComputeTarget,Environment
ws = Workspace.from_config()

# Get dataset
input_ds = Dataset.get_by_name(ws, 'weather_type')

In [7]:
env = Environment.from_conda_specification(
    name='aml-env',
    file_path='environment.yml'  # Path to your YML file
)
env.register(workspace=ws)


{
    "assetId": "azureml://locations/centralus/workspaces/e5889720-c828-45a8-95af-4fa79bac3b60/environments/aml-env/versions/1",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240709.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "aml-env",
    "python": {
 

In [8]:
# Specify your existing cluster name
cluster_name = "compute"  # Change this to your actual cluster name

# Get the existing compute target
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print(f"Found existing cluster: {cluster_name}")
except Exception as e:
    print(f"Cluster {cluster_name} not found. Error: {e}")

Found existing cluster: compute


In [9]:
# Create script config
from azureml.core import ScriptRunConfig

script_config = ScriptRunConfig(
    source_directory='.',
    script='script.py',
    arguments=['--input-data', input_ds.as_named_input('raw_data')],
    environment=env,
    compute_target=cluster
)

In [24]:
# Hyperparameter tuning config
from azureml.train.hyperdrive import RandomParameterSampling, PrimaryMetricGoal, HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, loguniform

param_sampling = RandomParameterSampling(
    {
        '--learning_rate': loguniform(0.0001, 0.01),
        '--l2_regularizer': loguniform(0.0001, 0.01),
        '--dropout_rate': uniform(0.2, 0.5)
    }
)

hyperdrive_config = HyperDriveConfig(
    run_config=script_config,
    hyperparameter_sampling=param_sampling,
    policy=None,
    primary_metric_name='test_accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=10,
    max_concurrent_runs=2
)

In [25]:
# Submit experiment
from azureml.core.experiment import Experiment
new_exp = Experiment(workspace=ws, name='hyperdrive-experiment')
new_run = new_exp.submit(config=hyperdrive_config)
new_run.wait_for_completion(show_output=True)

RunId: HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3
Web View: https://ml.azure.com/runs/HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3?wsid=/subscriptions/4631f82e-b10a-4df4-8eac-f72ff5878b5d/resourcegroups/myml1234/workspaces/kmaml&tid=51cca0e1-2d5a-4d1b-aa9a-ddaaa78aeec2

Streaming azureml-logs/hyperdrive.txt

[2025-04-12T16:21:19.3800563Z][GENERATOR][DEBUG]Sampled 2 jobs from search space 
[2025-04-12T16:21:19.7938128Z][SCHEDULER][INFO]Scheduling job, id='HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_0' 
[2025-04-12T16:21:19.8754565Z][SCHEDULER][INFO]Scheduling job, id='HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_1' 
[2025-04-12T16:21:20.2279622Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_0' 
[2025-04-12T16:21:20.2744576Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_1' 
[2025-04-12T16:22:51.5650559Z][GENERATOR][DEBUG]Sampled 2 jobs from search space 
[2025-04-12T16:22:51.7645936Z][SCHEDULER][INFO]Scheduling job,

{'runId': 'HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3',
 'target': 'compute',
 'status': 'Completed',
 'startTimeUtc': '2025-04-12T16:21:17.959451Z',
 'endTimeUtc': '2025-04-12T16:29:24.508649Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"test_accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '905318ff-8d8a-40aa-9568-d5a1951b0463',
  'user_agent': 'python/3.10.11 (Linux-5.15.0-1073-azure-x86_64-with-glibc2.31) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.57.0',
  'best_child_run_id': 'HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_5',
  'score': '0.9022727012634276',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_5'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClientType': 'azureml-sdk-

In [26]:
best_run = new_run.get_best_run_by_primary_metric()

if best_run:
    print('Best Run ID:', best_run.id)
    print('Best Run Metrics:', best_run.get_metrics())
else:
    print('No best run found. Please check the status of your runs and metrics.')


Best Run ID: HD_4dc2d7c9-c449-4e43-aa8b-fd5610730ad3_5
Best Run Metrics: {'test_loss': 0.28322502970695496, 'test_accuracy': 0.9022727012634277}


In [27]:
# Register the ENTIRE 'outputs' folder
model = best_run.register_model(
    model_name='weather_type_model',
    model_path='outputs/',  # notice the slash -> registering the whole folder
    description='Model and preprocessors registered together from best hyperdrive run'
)

print("Model registered successfully:", model.name, "Version:", model.version)

Model registered successfully: weather_type_model Version: 1
