In [1]:
from azureml.core import Environment
env = Environment.from_conda_specification(name='training_environment', file_path='./conda_dependencies.yml')


In [6]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-261397
Azure region: southcentralus
Subscription id: d4ad7261-832d-46b2-b093-22156001df5b
Resource group: aml-quickstarts-261397


In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_cluster_name = "opt-azure-ml-mani"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

try:
    compute_cluster = ComputeTarget(workspace=ws, name=compute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_cluster = ComputeTarget.create(ws, compute_cluster_name, compute_config)

compute_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform,choice
import os
from azureml.core import Environment
from azureml.core import ScriptRunConfig

# Specify parameter sampler
ps = RandomParameterSampling( {  
    "--C": choice(0.001, 0.01, 0.1, 1, 10, 20, 50, 100, 200, 500, 1000),
    "--max_iter": choice(100, 200, 300)
    }
)
# Specify a Policy
policy = BanditPolicy(evaluation_interval=3, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
compute_target = ws.compute_targets[compute_cluster_name]
est = SKLearn(source_directory = "./", 
                compute_target=compute_cluster_name, 
                vm_size="STANDARD_DS3_V2", 
                entry_script="train.py",
                environment_definition=env)


# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(hyperparameter_sampling=ps,
                                     primary_metric_name="Accuracy", 
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=policy,
                                     estimator=est,
                                     max_total_runs=10, 
                                    max_concurrent_runs=5)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
If environment_definition or conda_dependencies_file_path is specified, Azure ML will not install any framework related packages on behalf of the user.


In [5]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

from azureml.widgets import RunDetails
experiment = Experiment(ws, "udacity-project")
hyperdrive_run = experiment.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()

2024-07-01 08:45:30.131181: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /anaconda/envs/azureml_py38/lib/python3.9/site-packages/cv2/../../lib64:
2024-07-01 08:45:30.131210: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Failed to load image Python extension: libc10_cuda.so: cannot open shared object file: No such file or directory


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [6]:
hyperdrive_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540,hyperdrive,Running,Link to Azure Machine Learning studio,Link to Documentation


In [7]:
hyperdrive_run.wait_for_completion(show_output=True)


RunId: HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540
Web View: https://ml.azure.com/runs/HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540?wsid=/subscriptions/d4ad7261-832d-46b2-b093-22156001df5b/resourcegroups/aml-quickstarts-261397/workspaces/quick-starts-ws-261397&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

[2024-07-01T08:45:29.075330][GENERATOR][INFO]Trying to sample '5' jobs from the hyperparameter space
[2024-07-01T08:45:29.6083550Z][SCHEDULER][INFO]Scheduling job, id='HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_0' 
[2024-07-01T08:45:29.7190243Z][SCHEDULER][INFO]Scheduling job, id='HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_1' 
[2024-07-01T08:45:29.8538227Z][SCHEDULER][INFO]Scheduling job, id='HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_2' 
[2024-07-01T08:45:29.9209939Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_0' 
[2024-07-01T08:45:29.9991907Z][SCHEDULER][INFO]Scheduling job, id='HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_3

{'runId': 'HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540',
 'target': 'opt-azure-ml-mani',
 'status': 'Completed',
 'startTimeUtc': '2024-07-01T08:45:28.302946Z',
 'endTimeUtc': '2024-07-01T08:50:01.873345Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"Accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '4d11e0ca-82f3-47b0-b34f-6e41987ef5f6',
  'user_agent': 'python/3.9.19 (Linux-5.15.0-1064-azure-x86_64-with-glibc2.31) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.56.0',
  'space_size': '33',
  'best_child_run_id': 'HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_9',
  'score': '0.9095599393019728',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_9'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'aml

In [8]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n C:',parameter_values[1])
print('\n max-iter:',parameter_values[3])



Best Run Id:  HD_c689ba12-c35a-41ac-95a5-3f5b4da4a540_9

 Accuracy: 0.9095599393019727

 C: 1

 max-iter: 300


In [9]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

path= "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files(path=path)

In [10]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


In [11]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.


automl_config = AutoMLConfig(
    compute_target = compute_cluster_name,
    experiment_timeout_minutes=30,
    task="classification",
    primary_metric="accuracy",
    training_data=ds,
    label_column_name='y',
    n_cross_validations=2)

In [12]:
# Submit your automl run

remote_run = exp.submit(automl_config, show_output=False)
remote_run.wait_for_completion()

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_712f04cc-ffbf-402f-b6c8-ef37296d8b96,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


{'runId': 'AutoML_712f04cc-ffbf-402f-b6c8-ef37296d8b96',
 'target': 'opt-azure-ml-mani',
 'status': 'Completed',
 'startTimeUtc': '2024-07-01T08:50:29.387998Z',
 'endTimeUtc': '2024-07-01T09:45:56.727073Z',
 'services': {},
   'message': 'Experiment timeout reached, hence experiment stopped. Current experiment timeout: 0 hour(s) 30 minute(s)'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '2',
  'target': 'opt-azure-ml-mani',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"afb255d8-3ae3-4334-bfbc-271d1a845690\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-accel-models": "1.56.0", "azureml-automl-core": "1.56.0", "azureml-automl-dnn-nlp": "1.56.0", "az

In [14]:
# Retrieve and save your best automl model.

best_run, fitted_model = remote_run.get_output()

print(best_run)
print(fitted_model)

print("Best run metrics: ", best_run.get_metrics())
print("Best run details: ", best_run.get_details())

Run(Experiment: udacity-project,
Id: AutoML_712f04cc-ffbf-402f-b6c8-ef37296d8b96_32,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, is_cross_validation=True, working_dir='/mnt/batch/tasks/shared/LS_root/mounts/clusters/opt-ml-ai/code/Users/odl_user_261397')),
                ('prefittedsoftvotingclassifier',
                 PreFittedSoftVotingClassifier(classification_labels=array([0, 1]), estimators=[('24', Pipeline(st....001, fit_intercept=True, l1_ratio=0.42857142857142855, learning_rate='constant', loss='modified_huber', max_iter=1000, penalty='none', power_t=0.7777777777777777, tol=0.0001))]))], flatten_transform=False, weights=[0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285]))])
Y_transformer(['LabelEncoder', LabelEncoder()])
Best run metrics:  {'AUC_weighted': 0.946816

In [15]:
print("Best run metrics: ", best_run.get_metrics())


Best run metrics:  {'AUC_weighted': 0.9468161327679272, 'f1_score_weighted': 0.9146735489729876, 'AUC_micro': 0.9805185232602854, 'average_precision_score_macro': 0.8244391129888055, 'precision_score_micro': 0.9165705614567526, 'recall_score_micro': 0.9165705614567526, 'accuracy': 0.9165705614567526, 'recall_score_weighted': 0.9165705614567526, 'average_precision_score_micro': 0.9812771339250685, 'norm_macro_recall': 0.5377777720416439, 'matthews_correlation': 0.5629013477840284, 'f1_score_macro': 0.7809036927612687, 'average_precision_score_weighted': 0.9552379540468185, 'f1_score_micro': 0.9165705614567526, 'weighted_accuracy': 0.9532578497220004, 'recall_score_macro': 0.768888886020822, 'log_loss': 0.19535313425287987, 'AUC_macro': 0.9468161327679272, 'balanced_accuracy': 0.768888886020822, 'precision_score_macro': 0.7946045683744782, 'precision_score_weighted': 0.9132109051083912, 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_712f04cc-ffbf-402f-b6c8-ef37296d8b96_32/

In [9]:
from azureml.core import Workspace, ComputeTarget
from azureml.core.compute import AmlCompute

# Check if the compute target exists
if compute_cluster_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_cluster_name]
else:
    compute_target = None
    print(f'Compute target {compute_cluster_name} not found.')

# Delete the compute target if it exists
if compute_target:
    compute_target.delete()
    print(f'Compute target {compute_cluster_name} deleted successfully.')


Compute target opt-azure-ml-mani deleted successfully.


In [8]:
compute_cluster_name

'opt-azure-ml-mani'