In [1]:
from azureml.core import Workspace, Experiment

#ws = Workspace.list()
ws = Workspace.get(name="quick-starts-ws-159441", subscription_id="d4ad7261-832d-46b2-b093-22156001df5b", resource_group="aml-quickstarts-159441")

exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-159441
Azure region: southcentralus
Subscription id: d4ad7261-832d-46b2-b093-22156001df5b
Resource group: aml-quickstarts-159441


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Get compute or create if nonexisting
try:
    compute = ComputeTarget(workspace = ws, name = 'compute')
    print('Using existing compute')
except ComputeTargetException:
    print('Create new compute cluster')
    config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V3', max_nodes = 4)
    compute = ComputeTarget.create(ws, 'compute', config)
    compute._wait_for_completion(show_output=True)

Using existing compute


In [13]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os


# Specify parameter sampler
ps = RandomParameterSampling({
        "--C": uniform(0,1.0),
        "--max_iter": choice(50,100,250,500)
})


# Specify a Policy
policy = BanditPolicy(evaluation_interval = 10, slack_factor=0.2) ### YOUR CODE HERE ###


if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(script_params = {'--C':1.0, '--max_iter':100}, 
              compute_target=compute, 
              entry_script='train.py', 
              source_directory="")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator = est,
                                     hyperparameter_sampling=ps,
                                     policy = policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs = 50,
                                     max_concurrent_runs = 4) ### YOUR CODE HERE ###



In [14]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hdr = exp.submit(config = hyperdrive_config)



In [51]:
hdr

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,HD_9f0a2b3f-7229-4182-b287-c4907ddb2168,hyperdrive,Running,Link to Azure Machine Learning studio,Link to Documentation


In [15]:
RunDetails(hdr).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [53]:
import joblib
# Get your best run and save the model from that run.

bestrun = hdr.get_best_run_by_primary_metric()
bestrun.get_details()['runDefinition']['arguments']

['--C', '0.9221863476809233', '--max_iter', '50']

In [55]:
model = bestrun.register_model(model_name='hyperdrive_lr', model_path='outputs/model.joblib')
model

Model(workspace=Workspace.create(name='quick-starts-ws-159441', subscription_id='d4ad7261-832d-46b2-b093-22156001df5b', resource_group='aml-quickstarts-159441'), name=hyperdrive_lr, id=hyperdrive_lr:1, version=1, tags={}, properties={})

In [3]:
# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
from azureml.data.dataset_factory import TabularDatasetFactory
data_path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(data_path)

In [4]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

In [9]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    primary_metric="accuracy",
    training_data = ds,
    label_column_name = 'y',
    compute_target=compute,
    n_cross_validations=5)

In [10]:
# Submit your automl run
automlrun = exp.submit(config=automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on compute with default configuration
Running on remote compute: compute


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_ad18a3c3-d1ca-437b-9e2d-5e37e319d011,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+---------------------------------+--------------------------------------+
|Size of the smallest class       |Name/Label of the sm

In [11]:
# Retrieve and save your best automl model.
bestrun, fittedmodel = automlrun.get_output()

Package:azureml-automl-runtime, training version:1.34.0.post1, current version:1.33.0
Package:azureml-core, training version:1.34.0, current version:1.33.0
Package:azureml-dataprep, training version:2.22.2, current version:2.20.1
Package:azureml-dataprep-rslex, training version:1.20.2, current version:1.18.0
Package:azureml-dataset-runtime, training version:1.34.0, current version:1.33.0
Package:azureml-defaults, training version:1.34.0, current version:1.33.0
Package:azureml-interpret, training version:1.34.0, current version:1.33.0
Package:azureml-mlflow, training version:1.34.0, current version:1.33.0
Package:azureml-pipeline-core, training version:1.34.0, current version:1.33.0
Package:azureml-responsibleai, training version:1.34.0, current version:1.33.0
Package:azureml-telemetry, training version:1.34.0, current version:1.33.0
Package:azureml-train-automl-client, training version:1.34.0, current version:1.33.0
Package:azureml-train-automl-runtime, training version:1.34.0.post1, c

In [12]:
model = automlrun.register_model(model_name='automl', description = 'AutoML best model')
model

Model(workspace=Workspace.create(name='quick-starts-ws-159441', subscription_id='d4ad7261-832d-46b2-b093-22156001df5b', resource_group='aml-quickstarts-159441'), name=automl, id=automl:1, version=1, tags={}, properties={})

In [None]:
#cleanup
compute.delete()