In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
cluster_name = "banking-cluster1"

try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)


Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
print(cpu_cluster.get_status().serialize())

{'currentNodeCount': 4, 'targetNodeCount': 4, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 4, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-05-18T09:22:08.159000+00:00', 'errors': None, 'creationTime': '2021-05-18T09:15:00.988490+00:00', 'modifiedTime': '2021-05-18T09:15:16.355988+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 1, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT1200S'}, 'vmPriority': 'LowPriority', 'vmSize': 'STANDARD_DS3_V2'}


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive import choice
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os

# Specify parameter sampler
ps = RandomParameterSampling({
    "C": uniform(0.1, 100),
    "max_iter": choice (10, 50, 100, 150)
    })

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory=".", 
              compute_target=cpu_cluster, 
              entry_script="train.py", 
              vm_size="Standard_D2_V2")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name="accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=20,
                             max_concurrent_runs=3)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [5]:
# Submit your hyperdrive run to the experiment 
hyperdrive_run = exp.submit(hyperdrive_config)




In [6]:
#show run details with the widget.
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_e283d4a8-065e-4c08-a2e3-6fae9040aff0
Web View: https://ml.azure.com/runs/HD_e283d4a8-065e-4c08-a2e3-6fae9040aff0?wsid=/subscriptions/d7f39349-a66b-446e-aba6-0053c2cf1c11/resourcegroups/aml-quickstarts-145073/workspaces/quick-starts-ws-145073&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-05-18T09:29:45.096233][API][INFO]Experiment created<END>\n""<START>[2021-05-18T09:29:45.677503][GENERATOR][INFO]Trying to sample '3' jobs from the hyperparameter space<END>\n""<START>[2021-05-18T09:29:45.857121][GENERATOR][INFO]Successfully sampled '3' jobs, they will soon be submitted to the execution target.<END>\n"


In [15]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run.get_details()
hyperdrive_run.get_children_sorted_by_primary_metric(top=1)

In [16]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

datalink = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(datalink)

In [17]:

from train import clean_data
from sklearn.model_selection import train_test_split
import pandas as pd


# Use the clean_data function to clean your data.
x, y = clean_data(ds)
x_train, x_test, y_train, y_test = train_test_split(x, y)
train_data = pd.concat([x_train,y_train],axis = 1)

In [18]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task= 'classification',
    primary_metric= 'accuracy',
    training_data= train_data,
    label_column_name= 'y',
    n_cross_validations= 5)

In [19]:
#pip install -r /anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/validated_linux_requirements.txt

In [20]:
# Submit your automl run

experiment_auto = Experiment(ws, 'automl_remote')
remote_run = experiment_auto.submit(automl_config, show_output=True)
RunDetails(remote_run).show()

No run_configuration provided, running on local with default configuration
Running in the active local environment.
Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanc

Experiment,Id,Type,Status,Details Page,Docs Page
automl_remote,AutoML_58e39dda-c7a0-4df9-9702-f835716d3c58,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


MaxAbsScaler GradientBoosting                  0:00:36       0.9040    0.9159
         8   StandardScalerWrapper RandomForest             0:00:29       0.8982    0.9159
         9   MaxAbsScaler LogisticRegression                0:00:29       0.9098    0.9159
        10   MaxAbsScaler LightGBM                          0:00:25       0.8918    0.9159
        11   SparseNormalizer XGBoostClassifier             0:00:35       0.9134    0.9159
        12   MaxAbsScaler ExtremeRandomTrees                0:01:29       0.8877    0.9159
        13   StandardScalerWrapper LightGBM                 0:00:25       0.8877    0.9159
        14   SparseNormalizer XGBoostClassifier             0:01:18       0.9145    0.9159
        15   MaxAbsScaler LightGBM                          0:00:29       0.9102    0.9159
        16   StandardScalerWrapper LightGBM                 0:00:25       0.8877    0.9159
        17   StandardScalerWrapper ExtremeRandomTrees       0:00:38       0.8877    0.9159
        18  

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [21]:
# Retrieve and save your best automl model.

best_run = remote_run.get_best_child()

best_run.get_details()
best_run, best_model=remote_run.get_output()
best_run.register_model(model_name='automl_best_model.pkl',model_path='outputs/')
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl_remote,AutoML_58e39dda-c7a0-4df9-9702-f835716d3c58_40,,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [22]:
#cpu_cluster.delete()