In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-123107")
exp = Experiment(workspace=ws, name="hypdrive")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-123107
Azure region: southcentralus
Subscription id: 2552278b-2817-43a7-820e-5a5a53ff9e19
Resource group: aml-quickstarts-123107


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###


cluster_name = "amlcomp"
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target {}.'.format(cluster_name))
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
                                                               max_nodes=4)


      # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it uses the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

compute_target.wait_for_completion(show_output= True)
print("Azure Machine Learning Compute attached")

Creating a new compute target...
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
Azure Machine Learning Compute attached


In [5]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
datapath="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds=TabularDatasetFactory.from_delimited_files(path=datapath)
ds=ds.to_pandas_dataframe()

### YOUR CODE HERE ###

In [8]:
train_data =TabularDatasetFactory.from_delimited_files(path=datapath)
#train_data, validation_data = ds.random_split(percentage=0.8, seed=200) # use next time
label = "y"

In [9]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric= 'accuracy',
    training_data= train_data,
    label_column_name= 'y',
    n_cross_validations=5,
    compute_target=compute_target,
    iterations=50,
    max_concurrent_iterations=10)

In [10]:
# Submit your automl run

### YOUR CODE HERE ###
automl_run=exp.submit(automl_config,show_output=True)

Running on remote.
Running on remote compute: amlcomp
Parent Run ID: AutoML_0f00c845-7726-43ba-a8ee-59a13736d47c

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias 

In [12]:
# Wait for the remote run to complete
automl_run.wait_for_completion()

{'runId': 'AutoML_0f00c845-7726-43ba-a8ee-59a13736d47c',
 'target': 'amlcomp',
 'status': 'Completed',
 'startTimeUtc': '2020-10-27T00:27:56.164403Z',
 'endTimeUtc': '2020-10-27T00:59:07.461626Z',
 'properties': {'num_iterations': '50',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'amlcomp',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"38d4351e-4635-4bf8-9064-622251c79ca6\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 1, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\\\\\\", \\\\\\"sas\\\\\\": null, \\\\\\"storageAccountName\\\\\\": null, \\\\\

In [13]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###
best_run,fitted_model=automl_run.get_output()
print(best_run)
print(fitted_model)

Run(Experiment: hypdrive,
Id: AutoML_0f00c845-7726-43ba-a8ee-59a13736d47c_48,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                  eta0=0.01,
                                                                                                  fit_intercept=True,
                                           

In [14]:
import joblib
joblib.dump(fitted_model,'automl_bestmodel.joblib')

['automl_bestmodel.joblib']

In [16]:
#print best parameters and accuracy

print("Best Run Id: ", best_run.id)
print('\n Accuracy:', best_run.get_metrics()['accuracy'])

print("\n Parameters: ", best_run.get_details())

Best Run Id:  AutoML_0f00c845-7726-43ba-a8ee-59a13736d47c_48

 Accuracy: 0.9179362670713201

 Parameters:  {'runId': 'AutoML_0f00c845-7726-43ba-a8ee-59a13736d47c_48', 'target': 'amlcomp', 'status': 'Completed', 'startTimeUtc': '2020-10-27T00:57:06.850625Z', 'endTimeUtc': '2020-10-27T00:58:39.641304Z', 'properties': {'runTemplate': 'automl_child', 'pipeline_id': '__AutoML_Ensemble__', 'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'accuracy\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'hypdrive\',\'compute_target\':\'amlcomp\',\'subscription_id\':\'2552278b-2817-43a7-820e-5a5a53ff9e19\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_0f00c845-7726-43ba-a8ee-59a13736d47c_48","experiment_name":"hypdrive","workspa

In [None]:
compute_target.delete()