In [15]:
import pandas as pd

from azureml.core import Workspace, Experiment

# from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
from azureml.data.dataset_factory import TabularDatasetFactory
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
from azureml.core import Workspace, Experiment

ws =Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print(ws.get_details()['id'])

/subscriptions/f5091c60-1c3c-430f-8d81-d802f6bf2414/resourceGroups/aml-quickstarts-135392/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-135392


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute


compute_name = "DS2V2"

try:
    vm = ComputeTarget(ws, compute_name)
    print(f"{compute_name} exists already")
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2_V2", max_nodes=4)
    vm = ComputeTarget.create(ws, compute_name, compute_config)
    
vm.wait_for_completion(show_output=True)

DS2V2 exists already

Running


In [4]:
# Specify parameter sampler


param_space = { 
                                    "--C"     : choice(100, 10, 1.0, 0.1, 0.01), 
                                    "--solver": choice("lbfgs", "liblinear", "saga", "sag"),
                                    "--reg": choice("l1","l2")            
                                    
              }

sampling = RandomParameterSampling(param_space)

# Specifying Bandit Policy. 
# ROC will be evaluated at every run, starting from 21st run. 
# If the performance in the successive runs is below 91% of the best performing run, HPO will be stopped


policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=20)

# Code below makes a new directory for training and copies the train script
if "training" not in os.listdir():
    os.mkdir("./training")
import shutil
shutil.copy('train.py', './training')
    
# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./training', 
              compute_target=vm, 
              entry_script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est, 
                                     policy=policy, 
                                     primary_metric_name="AUC",
                                     hyperparameter_sampling=sampling,
                                     max_total_runs=200,
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


#### Submit Hyperdrive

In [5]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

hpo_run = exp.submit(hyperdrive_config)
RunDetails(hpo_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

#### Obtain Best Model

In [6]:

import joblib
# Get your best run and save the model from that run.


best_run = hpo_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\n AUC:', best_run_metrics['AUC']*100)
print('\n Regularization Strength:',best_run_metrics['Regularization:'])
print('\n Solvers:',best_run_metrics['Solver:'])

Best Run Id:  HD_f47c782b-4bfb-453a-8f8d-5565f0d7ceda_3

 AUC: 77.3585596050799

 Regularization Strength: l1

 Solvers: liblinear


## AutoML

In [38]:
from azureml.core import Dataset
# Get a dataset by name and version number
aml_ds = Dataset.get_by_name(workspace = ws,
                                 name = 'Bank-marketing', 
                                 version = 2)

aml_ds

{
  "source": [
    "https://udacitystorage.blob.core.windows.net/udacity/bankmarketing_train.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "c9c5d40f-b442-45f8-9155-e8cde1648a06",
    "name": "Bank-marketing",
    "version": 2,
    "workspace": "Workspace.create(name='quick-starts-ws-135392', subscription_id='f5091c60-1c3c-430f-8d81-d802f6bf2414', resource_group='aml-quickstarts-135392')"
  }
}

Get columns names to make sure 'duration' column is dropped

### Setup Automl Config

In [37]:
dataaml = ws.datasets.get("amldata")
dataaml

In [39]:
#### Submit AutoML Experiment

from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=20,
    task='classification',
    primary_metric='AUC_weighted',
    compute_target=vm,
    training_data=aml_ds,
    label_column_name='y',
    n_cross_validations=5)

##### Submit Auto ML

In [None]:
#Submit aml experiment

print('Starting AutoML...')
automl_experiment = Experiment(ws, 'automl')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

Starting AutoML...
Running on remote.


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


Current status: FeaturesGeneration. Generating features for the dataset.


In [None]:
best_run, fitted_model = automl_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

In [None]:
# Register the model

from azureml.core import Model

# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='model_automl',
                        tags={'Training context':'Auto ML'},
                        properties={'Accuracy': best_run_metrics['accuracy']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')