In [1]:
from azureml.core import Workspace, Experiment, Environment
from azureml.core import ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
import os
import shutil

# Load workspace
ws = Workspace.from_config()

# Specify the name of the compute cluster
compute_name = "trainCluster"

# Check if the compute target already exists, otherwise create it
try:
    trainCluster = ComputeTarget(ws, compute_name)
    print(f"{compute_name} exists already")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2_V2", max_nodes=4)
    trainCluster = ComputeTarget.create(ws, compute_name, compute_config)
    trainCluster.wait_for_completion(show_output=True)

# Create a new environment
env = Environment(name="my-sklearn-env")
conda_dep = CondaDependencies()
conda_dep.add_conda_package("scikit-learn")
conda_dep.add_conda_package("pandas")  # Add pandas dependency
env.python.conda_dependencies = conda_dep

# Register the environment
env.register(workspace=ws)

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        "--C": choice(1, 2, 3, 4, 5),
        "--max_iter": choice(80, 100, 120, 150, 170, 200)
    }
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=1, slack_factor=0.2, delay_evaluation=5)

# Create a directory for training and copy the training script
if "training" not in os.listdir():
    os.mkdir("./training")
shutil.copy('train.py', './training')

# Create a ScriptRunConfig
src = ScriptRunConfig(source_directory='./training',
                      script='train.py',
                      compute_target=trainCluster,
                      environment=env)

# Create a HyperDriveConfig using the ScriptRunConfig, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     policy=policy,
                                     hyperparameter_sampling=ps,
                                     primary_metric_name="Accuracy",
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=10)

# Submit the HyperDrive run
experiment_name = 'your-experiment-name'
experiment = Experiment(workspace=ws, name=experiment_name)
hyperdrive_run = experiment.submit(config=hyperdrive_config)

# wait for completion and retrieve the best run
hyperdrive_run.wait_for_completion(show_output=True)
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\nAccuracy:', best_run_metrics['Accuracy'])


trainCluster exists already
RunId: HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a
Web View: https://ml.azure.com/runs/HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a?wsid=/subscriptions/d2d90bd8-e567-4097-88c9-9532cc375686/resourcegroups/cloud_shell/workspaces/udacity&tid=f3822f31-4d32-4719-a061-c45fac0a64ab

Streaming azureml-logs/hyperdrive.txt

[2024-07-01T00:44:33.093856][GENERATOR][INFO]Trying to sample '10' jobs from the hyperparameter space
[2024-07-01T00:44:33.6854184Z][SCHEDULER][INFO]Scheduling job, id='HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a_0' 
[2024-07-01T00:44:33.8128413Z][SCHEDULER][INFO]Scheduling job, id='HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a_1' 
[2024-07-01T00:44:33.9369349Z][SCHEDULER][INFO]Scheduling job, id='HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a_2' 
[2024-07-01T00:44:34.0428287Z][SCHEDULER][INFO]Scheduling job, id='HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a_3' 
[2024-07-01T00:44:34.1721100Z][SCHEDULER][INFO]Scheduling job, id='HD_71c76867-b3e6-48b0-9bd3-d03366a0c40a_4' 
[2024-07

In [2]:
#Code below registers the best model with the information of Metrics
model = best_run.register_model(model_name='HyperDrive_HighAccuracy', model_path='outputs/', 
                                properties={'Accuracy': best_run_metrics['Accuracy'],
                                            'Regularization Strength': best_run_metrics['Regularization Strength:'],
                                           'Max Iterations': best_run_metrics['Max iterations:']})

In [3]:
model

Model(workspace=Workspace.create(name='udacity', subscription_id='d2d90bd8-e567-4097-88c9-9532cc375686', resource_group='cloud_shell'), name=HyperDrive_HighAccuracy, id=HyperDrive_HighAccuracy:2, version=2, tags={}, properties={'Accuracy': '0.9124936772888215', 'Regularization Strength': '5.0', 'Max Iterations': '200'})