In [2]:
from azureml.core import Workspace, Experiment, Environment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig

from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform

from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset, Datastore
from train import clean_data

from azureml.train.automl import AutoMLConfig

import os
import joblib

In [3]:
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-148124
Azure region: southcentralus
Subscription id: f5091c60-1c3c-430f-8d81-d802f6bf2414
Resource group: aml-quickstarts-148124


In [4]:
# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
compute_name = "cluster1"
vm_size = "Standard_D2_V2"
min_nodes, max_nodes = 0, 4
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("found compute target: " + compute_name)
else:
    print("creating new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size, min_nodes = min_nodes, max_nodes = max_nodes)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
compute_target.wait_for_completion(show_output=True)

found compute target: cluster1
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [5]:
# Specify parameter sampler
ps = RandomParameterSampling( {
    'C': uniform(0.01, 100)
})
# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
sklearn_env = Environment.get(ws, 'AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu')
est = ScriptRunConfig(source_directory = '.',
                      compute_target = compute_target,
                      script = 'train.py',
                      arguments= ['--C', 1.0, '--max_iter', 100],
                      environment = sklearn_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=est,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name="Accuracy",
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=20,
    max_concurrent_runs=4
)

In [6]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(hyperdrive_config)

In [7]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [8]:
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,HD_1aecda9b-4855-47b4-80d4-759b4ee2d23b_0,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [12]:
best_run.download_file('outputs/model.pkl', 'training/logistic-model.pkl')

## Auto-ML

In [19]:
# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")
# Use the clean_data function to clean your data.
x, y = clean_data(ds)
data = x.copy()
data['label'] = y

datastore = Datastore.get_default(ws)
training_data = Dataset.Tabular.register_pandas_dataframe(data, datastore, 'bankmarketing')

Method register_pandas_dataframe: This is an experimental method, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/7d341ed7-2470-4185-86ac-f2dddf737f3d/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [22]:
# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=training_data,
    label_column_name='label',
    n_cross_validations=5,
    compute_target = compute_target)

In [23]:
# Submit your automl run
exp_automl = Experiment(ws, 'auto-ml')
automl_run = exp_automl.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
auto-ml,AutoML_0e4084dc-2201-4fb0-980e-ade5e7b41841,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


In [24]:
RunDetails(automl_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [29]:
# Retrieve and save your best automl model.
best_run, fitted_model = automl_run.get_output()
joblib.dump(fitted_model, 'training/auto_ml.pkl')

## Cleaning resources

### Delete cluster

In [13]:
compute_target.delete()

Current provisioning state of AmlCompute is "Deleting"



### Stop or delete the notebook compute instance
1. Go to the compute tab
2. select the instance
3. Select delete or stop