In [1]:
from azureml.core import Workspace, Experiment

# reference to https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-147296
Azure region: southcentralus
Subscription id: cdbe0b43-92a0-4715-838a-f2648cc7ad21
Resource group: aml-quickstarts-147296


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

#Reference to lessen 5 source code and Azure Microsoft website https://docs.microsoft.com/en-us/azure/machine-learning/how-to-create-attach-compute-cluster?tabs=python
cpu_cluster_name = "cpucluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Creating.........
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, normal
import os

#refernce Lesssen 6 exercise 3 and Azure Microsoft document
# Specify parameter sampler
ps = RandomParameterSampling({
    '--C': uniform(0, 10),
    '--max_iter':choice([50,100,200,300])
}
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = "./",
              entry_script = "train.py", 
              compute_target=cpu_cluster)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator = est,
                                     hyperparameter_sampling = ps,
                                     policy = policy,
                                     primary_metric_name = 'Accuracy',
                                     primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs = 20,
                                     max_concurrent_runs = 4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [4]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hdr = exp.submit(hyperdrive_config)
RunDetails(hdr).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [5]:
import joblib
# Get your best run and save the model from that run.
best_run = hdr.get_best_run_by_primary_metric()
best_run.get_metrics()
best_run.get_details()
hdr.get_children_sorted_by_primary_metric(top=0, reverse=False, discard_no_metric=False)


[{'run_id': 'HD_0a00e340-567c-4df8-9a5d-6ec3d8e0678b_4',
  'hyperparameters': '{"--C": 5.523085422078032, "--max_iter": 200}',
  'best_primary_metric': 0.9161204175770818,
  'status': 'Completed'},
 {'run_id': 'HD_0a00e340-567c-4df8-9a5d-6ec3d8e0678b_6',
  'hyperparameters': '{"--C": 8.551895953712362, "--max_iter": 300}',
  'best_primary_metric': 0.915877640203933,
  'status': 'Completed'},
 {'run_id': 'HD_0a00e340-567c-4df8-9a5d-6ec3d8e0678b_2',
  'hyperparameters': '{"--C": 9.446321136494959, "--max_iter": 50}',
  'best_primary_metric': 0.9123573682932751,
  'status': 'Completed'},
 {'run_id': 'HD_0a00e340-567c-4df8-9a5d-6ec3d8e0678b_8',
  'hyperparameters': '{"--C": 9.869975207497946, "--max_iter": 100}',
  'best_primary_metric': 0.9118718135469774,
  'status': 'Completed'},
 {'run_id': 'HD_0a00e340-567c-4df8-9a5d-6ec3d8e0678b_10',
  'hyperparameters': '{"--C": 8.082500517305046, "--max_iter": 300}',
  'best_primary_metric': 0.9116290361738286,
  'status': 'Completed'},
 {'run_id':

In [6]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files(path='https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv')


In [7]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

In [18]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    n_cross_validations=2)

In [19]:
# Submit your automl run
# reference to the Lessen 6 exercise
remote_run = exp.submit(automl_config, show_output=True)

No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_fa883fd2-7f31-4f1a-83ec-2aa44b1400c8,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias toward



Current status: EngineeredFeatureExplanations. Computation of engineered features completed
Current status: RawFeaturesExplanations. Computation of raw features started
Current status: RawFeaturesExplanations. Computation of raw features completed
Current status: BestRunExplainModel. Best run model explanations completed
****************************************************************************************************


In [20]:
# Retrieve and save your best automl model.
best_run1 = remote_run.get_best_child()
best_run1.get_details()

{'runId': 'AutoML_fa883fd2-7f31-4f1a-83ec-2aa44b1400c8_46',
 'status': 'Completed',
 'startTimeUtc': '2021-06-16T17:01:34.588278Z',
 'endTimeUtc': '2021-06-16T17:02:36.005412Z',
 'properties': {'runTemplate': 'automl_child',
  'pipeline_id': '__AutoML_Ensemble__',
  'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'accuracy\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'udacity-project\',\'compute_target\':\'local\',\'subscription_id\':\'cdbe0b43-92a0-4715-838a-f2648cc7ad21\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_fa883fd2-7f31-4f1a-83ec-2aa44b1400c8_46","experiment_name":null,"workspace_name":"quick-starts-ws-147296","subscription_id":"cdbe0b43-92a0-4715-838a-f2648cc7ad21","resource_group_name":"aml-qui

In [21]:
cpu_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

