In [31]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
ws.write_config(path='.azureml')
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')


Workspace name: quick-starts-ws-130115
Azure region: southcentralus
Subscription id: 8ca4d354-cab8-4c39-9d61-ea227d2daaad
Resource group: aml-quickstarts-130115


In [32]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

#name for cpu cluster
amlcompute_cluster_name = "cpu-cluster"

#check for existance of cluster
try:
    aml_compute = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, using it ...')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2',
                                                           max_nodes = 4)
    aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
aml_compute.wait_for_completion(show_output=True)


Found existing cluster, using it ...
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [33]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
    "--C": uniform(0.001,0.2),
    "--max_iter": choice([100, 200, 300, 500])
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor=0.1, evaluation_interval=1, delay_evaluation=3)
import shutil
if "training" not in os.listdir():
    os.mkdir("./training")
os.makedirs("./training", exist_ok=True)
shutil.copy("train.py","./training")
# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = './training',
              compute_target=aml_compute,
              entry_script='train.py',
              pip_packages=["pyarrow>=0.12.0", "pyspark"])
primary_metric_name = "Accuracy"
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     primary_metric_name=primary_metric_name,
                                     primary_metric_goal=primary_metric_goal,
                                     policy=policy,
                                     max_total_runs=20,
                                     max_concurrent_runs=3,
                                     )



In [34]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperopt_run = exp.submit(hyperdrive_config, tag={'type':'hyperdrive_opt'})
RunDetails(hyperopt_run).show()
### YOUR CODE HERE ###



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [35]:
import joblib
# Get your best run and save the model from that run.
hyperopt_run.wait_for_completion(show_output=True)
assert (hyperopt_run.get_status() == "Completed")
best_run = hyperopt_run.get_best_run_by_primary_metric()

best_run.register_model(model_name="bankmarketing_hyperdrive", model_path="outputs/bankmarketing_hyperdrive.joblib")

RunId: HD_883e8608-8d39-4c13-a5ad-bd5a2723051d
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_883e8608-8d39-4c13-a5ad-bd5a2723051d?wsid=/subscriptions/8ca4d354-cab8-4c39-9d61-ea227d2daaad/resourcegroups/aml-quickstarts-130115/workspaces/quick-starts-ws-130115

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-11T10:29:27.569810][API][INFO]Experiment created<END>\n""<START>[2020-12-11T10:29:28.342937][GENERATOR][INFO]Trying to sample '3' jobs from the hyperparameter space<END>\n""<START>[2020-12-11T10:29:28.727941][GENERATOR][INFO]Successfully sampled '3' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2020-12-11T10:29:29.1906165Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_883e8608-8d39-4c13-a5ad-bd5a2723051d
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_883e8608-8d39-4c13-a5ad-bd5a2723051d?wsid=/subscriptions/8ca4d

Model(workspace=Workspace.create(name='quick-starts-ws-130115', subscription_id='8ca4d354-cab8-4c39-9d61-ea227d2daaad', resource_group='aml-quickstarts-130115'), name=bankmarketing_hyperdrive, id=bankmarketing_hyperdrive:5, version=5, tags={}, properties={})

In [36]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
bankmarketing_data = TabularDatasetFactory.from_delimited_files(url)

In [37]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(bankmarketing_data)

In [38]:
from azureml.train.automl import AutoMLConfig


# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
x["y"] = y # add target to training data
ds = ws.get_default_datastore()
input_data = TabularDatasetFactory.register_pandas_dataframe(dataframe=x, target=ds, name="bankmarketing")

automl_settings = {
    "n_cross_validations": 3,
    "enable_early_stopping": True,
    "primary_metric": "accuracy",
    "blocked_models": ["SVM"]
}
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    compute_target=aml_compute,
    training_data=input_data,
    label_column_name="y",
    **automl_settings)



Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/028967fb-6b19-4b38-93fd-68dfa4cd98a3/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [39]:
# Submit your automl run
automl_run = exp.submit(automl_config, tag={'type':'automl'}, show_output=True)




Running on remote.
No run_configuration provided, running on cpu-cluster with default configuration
Running on remote compute: cpu-cluster
Parent Run ID: AutoML_96e5159a-778d-467d-bc51-43969f4ac7fd

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Train-Test data split
STATUS:       DONE
DESCRIPTION:  Your input data has been split into a training dataset and a holdout test dataset for validation of the model. The test holdout dataset reflects the original distribution of your input data.
              
DETAILS:      
+---------------------------------+---------------------------------+---------------------------------+
|Dataset                          |Row counts                       |Percentage     



         0   MaxAbsScaler LightGBM                          0:00:40       0.9108    0.9108
         1   MaxAbsScaler XGBoostClassifier                 0:00:46       0.9071    0.9108
         2   MinMaxScaler RandomForest                      0:00:59       0.8998    0.9108
         3   MinMaxScaler RandomForest                      0:00:43       0.8880    0.9108
         4   MinMaxScaler RandomForest                      0:00:39       0.7885    0.9108
         5   MinMaxScaler SVM                               0:05:37       0.9047    0.9108
         6   MaxAbsScaler GradientBoosting                  0:00:52       0.9041    0.9108
         7   StandardScalerWrapper RandomForest             0:00:43       0.9011    0.9108
         8   StandardScalerWrapper RandomForest             0:00:51       0.8880    0.9108
         9   SparseNormalizer XGBoostClassifier             0:00:55       0.9093    0.9108
        10   RobustScaler ExtremeRandomTrees                0:00:48       0.8880    0.9108

In [47]:
# Retrieve and save your best automl model.
automl_run.wait_for_completion()
assert (automl_run.get_status() == "Completed")
best_run, best_model = automl_run.get_output()
best_run.register_model(model_name="best_automl_model", file_path="/outputs/model.pkl")
### YOUR CODE HERE ###



ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path best_automl_model in the set of files uploaded to the run: ['accuracy_table', 'automl_driver.py', 'azureml-logs/55_azureml-execution-tvmps_bd1ebb84d8abce07d4486624fd427f9a7240ed68d1463ee755ce9135d835cbfe_d.txt', 'azureml-logs/65_job_prep-tvmps_bd1ebb84d8abce07d4486624fd427f9a7240ed68d1463ee755ce9135d835cbfe_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_bd1ebb84d8abce07d4486624fd427f9a7240ed68d1463ee755ce9135d835cbfe_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'confusion_matrix', 'explanation/13a89e30/classes.interpret.json', 'explanation/13a89e30/eval_data_viz.interpret.json', 'explanation/13a89e30/expected_values.interpret.json', 'explanation/13a89e30/features.interpret.json', 'explanation/13a89e30/global_names/0.interpret.json', 'explanation/13a89e30/global_rank/0.interpret.json', 'explanation/13a89e30/global_values/0.interpret.json', 'explanation/13a89e30/local_importance_values.interpret.json', 'explanation/13a89e30/per_class_names/0.interpret.json', 'explanation/13a89e30/per_class_rank/0.interpret.json', 'explanation/13a89e30/per_class_values/0.interpret.json', 'explanation/13a89e30/rich_metadata.interpret.json', 'explanation/13a89e30/visualization_dict.interpret.json', 'explanation/3e1fa600/classes.interpret.json', 'explanation/3e1fa600/eval_data_viz.interpret.json', 'explanation/3e1fa600/expected_values.interpret.json', 'explanation/3e1fa600/features.interpret.json', 'explanation/3e1fa600/global_names/0.interpret.json', 'explanation/3e1fa600/global_rank/0.interpret.json', 'explanation/3e1fa600/global_values/0.interpret.json', 'explanation/3e1fa600/local_importance_values.interpret.json', 'explanation/3e1fa600/per_class_names/0.interpret.json', 'explanation/3e1fa600/per_class_rank/0.interpret.json', 'explanation/3e1fa600/per_class_values/0.interpret.json', 'explanation/3e1fa600/rich_metadata.interpret.json', 'explanation/3e1fa600/visualization_dict.interpret.json', 'explanation/3e1fa600/ys_pred_proba_viz.interpret.json', 'explanation/3e1fa600/ys_pred_viz.interpret.json', 'logs/azureml/102_azureml.log', 'logs/azureml/azureml_automl.log', 'logs/azureml/dataprep/python_span_562df873-4517-42de-8d2d-1cbd139a4d15.jsonl', 'logs/azureml/dataprep/python_span_6109baa1-2de3-41bd-9037-fff70794e8f4.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/conda_env_v_1_0_0.yml', 'outputs/env_dependencies.json', 'outputs/model.pkl', 'outputs/pipeline_graph.json', 'outputs/scoring_file_v_1_0_0.py']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path best_automl_model in the set of files uploaded to the run: ['accuracy_table', 'automl_driver.py', 'azureml-logs/55_azureml-execution-tvmps_bd1ebb84d8abce07d4486624fd427f9a7240ed68d1463ee755ce9135d835cbfe_d.txt', 'azureml-logs/65_job_prep-tvmps_bd1ebb84d8abce07d4486624fd427f9a7240ed68d1463ee755ce9135d835cbfe_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_bd1ebb84d8abce07d4486624fd427f9a7240ed68d1463ee755ce9135d835cbfe_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'confusion_matrix', 'explanation/13a89e30/classes.interpret.json', 'explanation/13a89e30/eval_data_viz.interpret.json', 'explanation/13a89e30/expected_values.interpret.json', 'explanation/13a89e30/features.interpret.json', 'explanation/13a89e30/global_names/0.interpret.json', 'explanation/13a89e30/global_rank/0.interpret.json', 'explanation/13a89e30/global_values/0.interpret.json', 'explanation/13a89e30/local_importance_values.interpret.json', 'explanation/13a89e30/per_class_names/0.interpret.json', 'explanation/13a89e30/per_class_rank/0.interpret.json', 'explanation/13a89e30/per_class_values/0.interpret.json', 'explanation/13a89e30/rich_metadata.interpret.json', 'explanation/13a89e30/visualization_dict.interpret.json', 'explanation/3e1fa600/classes.interpret.json', 'explanation/3e1fa600/eval_data_viz.interpret.json', 'explanation/3e1fa600/expected_values.interpret.json', 'explanation/3e1fa600/features.interpret.json', 'explanation/3e1fa600/global_names/0.interpret.json', 'explanation/3e1fa600/global_rank/0.interpret.json', 'explanation/3e1fa600/global_values/0.interpret.json', 'explanation/3e1fa600/local_importance_values.interpret.json', 'explanation/3e1fa600/per_class_names/0.interpret.json', 'explanation/3e1fa600/per_class_rank/0.interpret.json', 'explanation/3e1fa600/per_class_values/0.interpret.json', 'explanation/3e1fa600/rich_metadata.interpret.json', 'explanation/3e1fa600/visualization_dict.interpret.json', 'explanation/3e1fa600/ys_pred_proba_viz.interpret.json', 'explanation/3e1fa600/ys_pred_viz.interpret.json', 'logs/azureml/102_azureml.log', 'logs/azureml/azureml_automl.log', 'logs/azureml/dataprep/python_span_562df873-4517-42de-8d2d-1cbd139a4d15.jsonl', 'logs/azureml/dataprep/python_span_6109baa1-2de3-41bd-9037-fff70794e8f4.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/conda_env_v_1_0_0.yml', 'outputs/env_dependencies.json', 'outputs/model.pkl', 'outputs/pipeline_graph.json', 'outputs/scoring_file_v_1_0_0.py']\n                See https://aka.ms/run-logging for more details."
    }
}