In [1]:
pip install azureml-train-core azureml-train-restclients-hyperdrive

Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip install protobuf==3.20.*



In [9]:
from azureml.core import Workspace, Datastore, Experiment, Dataset, ComputeTarget,Environment
ws = Workspace.from_config()

# Get dataset
input_ds = Dataset.get_by_name(ws, 'weather_type')

In [10]:
env = Environment.from_conda_specification(
    name='aml-env',
    file_path='environment.yml'  # Path to your YML file
)
env.register(workspace=ws)


{
    "assetId": "azureml://locations/centralus/workspaces/ce6c5e26-39fe-4c18-b4ee-09bf71f37709/environments/aml-env/versions/3",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240709.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "aml-env",
    "python": {
 

In [11]:
# Specify your existing cluster name
cluster_name = "mycluster"  # Change this to your actual cluster name

# Get the existing compute target
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print(f"Found existing cluster: {cluster_name}")
except Exception as e:
    print(f"Cluster {cluster_name} not found. Error: {e}")

Found existing cluster: mycluster


In [12]:
# Create script config
from azureml.core import ScriptRunConfig

script_config = ScriptRunConfig(
    source_directory='.',
    script='script.py',
    arguments=['--input-data', input_ds.as_named_input('raw_data')],
    environment=env,
    compute_target=cluster
)

In [13]:
# Hyperparameter tuning config
from azureml.train.hyperdrive import RandomParameterSampling, PrimaryMetricGoal, HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, loguniform

param_sampling = RandomParameterSampling(
    {
        '--learning_rate': loguniform(0.0001, 0.01),
        '--l2_regularizer': loguniform(0.0001, 0.01),
        '--dropout_rate': uniform(0.2, 0.5)
    }
)

hyperdrive_config = HyperDriveConfig(
    run_config=script_config,
    hyperparameter_sampling=param_sampling,
    policy=None,
    primary_metric_name='accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=10,
    max_concurrent_runs=2
)

In [16]:
# Submit experiment
from azureml.core.experiment import Experiment
new_exp = Experiment(workspace=ws, name='hyperdrive-experiment')
new_run = new_exp.submit(config=hyperdrive_config)
new_run.wait_for_completion(show_output=True)

RunId: HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7
Web View: https://ml.azure.com/runs/HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7?wsid=/subscriptions/4631f82e-b10a-4df4-8eac-f72ff5878b5d/resourcegroups/azuremlkm/workspaces/myazuremlws&tid=51cca0e1-2d5a-4d1b-aa9a-ddaaa78aeec2

Streaming azureml-logs/hyperdrive.txt

[2025-04-11T17:05:07.8695103Z][GENERATOR][DEBUG]Sampled 2 jobs from search space 
[2025-04-11T17:05:08.0878243Z][SCHEDULER][INFO]Scheduling job, id='HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7_0' 
[2025-04-11T17:05:08.1776517Z][SCHEDULER][INFO]Scheduling job, id='HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7_1' 
[2025-04-11T17:05:08.4733131Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7_0' 
[2025-04-11T17:05:08.5758889Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7_1' 
[2025-04-11T17:06:39.9882376Z][GENERATOR][DEBUG]Sampled 2 jobs from search space 
[2025-04-11T17:06:40.1667537Z][SCHEDULER][INFO]Scheduli

{'runId': 'HD_a4aa4882-c0d9-49a5-8baf-e2120f7816a7',
 'target': 'mycluster',
 'status': 'Completed',
 'startTimeUtc': '2025-04-11T17:05:06.446866Z',
 'endTimeUtc': '2025-04-11T17:12:39.312488Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '31ad2c5d-1cc4-4088-a3b6-7e599b86869d',
  'user_agent': 'python/3.10.11 (Linux-5.15.0-1073-azure-x86_64-with-glibc2.31) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.57.0'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClientType': 'azureml-sdk-train',
   'amlClientModule': '[Scrubbed]',
   'amlClientFunction': '[Scrubbed]',
   'tenantId': '51cca0e1-2d5a-4d1b-aa9a-ddaaa78aeec2',
   'amlClientRequestId': '72a5dcb2-167a-4240-941c-3212d5eb73b5',
   'amlClientSess

In [23]:
import os
import joblib
os.makedirs('models',exist_ok=True)
joblib.dump(value=['model'],filename='classification_model.pkl')

['classification_model.pkl']