In [1]:
from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="udacity-project")

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project-lancia")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code E8PVTUS94 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-154782
Azure region: southcentralus
Subscription id: 81cefad3-d2c9-4f77-a466-99a7f541c7bb
Resource group: aml-quickstarts-154782


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###

try:
    compute_cluster = ComputeTarget.create(ws, "alpha",  AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4))
except Exception:
    print("using exisiting cluster")
###temp
    compute_cluster = ComputeTarget(workspace=ws, name="alpha")


In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os

# Specify parameter sampler
ps = RandomParameterSampling({
        "--C": uniform(0.1, 0.99),
        "--max_iter": choice(25,50,100,200)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1,
                                         evaluation_interval = 1,
                                         delay_evaluation = 5)


if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn( compute_target=compute_cluster, entry_script="train.py", source_directory=".")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy = policy,
                                     primary_metric_name="Accuracy",
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=20,
                                     max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [4]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

hdr = exp.submit(hyperdrive_config)
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_77cccad9-6295-4c77-92d3-f0fa8beb254c
Web View: https://ml.azure.com/runs/HD_77cccad9-6295-4c77-92d3-f0fa8beb254c?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-154782/workspaces/quick-starts-ws-154782&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-08-17T16:00:26.544119][API][INFO]Experiment created<END>\n""<START>[2021-08-17T16:00:27.011253][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-08-17T16:00:27.216863][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_77cccad9-6295-4c77-92d3-f0fa8beb254c
Web View: https://ml.azure.com/runs/HD_77cccad9-6295-4c77-92d3-f0fa8beb254c?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-154782/workspaces/quick-starts-ws-154782&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_77cccad9-6295-4c77-92d3-f0fa8beb254c',
 'target': 'alpha',
 'status': 'Completed',
 'startTimeUtc': '2021-08-17T16:00:26.195051Z',
 'endTimeUtc': '2021-08-17T16:12:07.366791Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '7dae318b-4bec-4b33-ae89-f8ee1c81695f',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1055-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.32.0',
  'score': '0.910152657715652',
  'best_child_run_id': 'HD_77cccad9-6295-4c77-92d3-f0fa8beb254c_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg154782.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_77cccad9-6295-4c77-92d3-f0fa8beb254c/azureml-logs/hyperdrive.txt?sv=2019-07-07&sr=b&sig=0PG2

In [5]:
import joblib
# Get your best run and save the model from that run.

best_run_hdr = hdr.get_best_run_by_primary_metric()
best_run_metrics_hdr = best_run_hdr.get_metrics()
best_params_hdr = best_run_hdr.get_details()['runDefinition']['arguments']

print("\n------------")
print('Best run ID: ', best_run_hdr.id,'\nBest run Accuracy: ', best_run_metrics_hdr['Accuracy'],'\n---------\nMetrics: ', best_run_metrics_hdr, '\nparams: ', best_params_hdr)
print("------------\n")

### YOUR CODE HERE ###
model = best_run_hdr.register_model(model_name='azmle-project-lancia', model_path='outputs/model.joblib' )


------------
Best run ID:  HD_77cccad9-6295-4c77-92d3-f0fa8beb254c_1 
Best run Accuracy:  0.910152657715652 
---------
Metrics:  {'Regularization Strength:': 0.29260487703997207, 'Max iterations:': 25, 'Accuracy': 0.910152657715652} 
params:  ['--C', '0.29260487703997207', '--max_iter', '25']
------------



In [6]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
data_path_ ="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files(path=data_path_)


In [9]:
from train import clean_data
import os
from sklearn.model_selection import train_test_split
from azureml.core import Dataset
from pandas import concat

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train data with target
train_data = concat([x_train, y_train], axis=1)
data_folder = 'training/data/'
if not os.path.exists(data_folder):
    os.mkdir(data_folder)


train_data.to_csv(os.path.join(data_folder,"prepared_data.csv"), index=False)
target_label = y_test.name

# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir=data_folder, target_path='udacity-lancia', overwrite=True)

# create a dataset referencing the cloud location
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('udacity-lancia/prepared_data.csv'))])

# Reload train data
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('udacity-lancia/prepared_data.csv'))])


Uploading an estimated of 1 files
Uploading training/data/prepared_data.csv
Uploaded training/data/prepared_data.csv, 1 files out of an estimated total of 1
Uploaded 1 files


In [10]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=dataset,
    label_column_name=target_label,
    n_cross_validations=3,
    compute_target= compute_cluster)

In [11]:
# Submit your automl run

automl = exp.submit(automl_config)
RunDetails(automl).show()
automl.wait_for_completion(show_output=True)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project-lancia,AutoML_eebfa5d9-c5f5-4e46-8969-6d9cbb16211c,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project-lancia,AutoML_eebfa5d9-c5f5-4e46-8969-6d9cbb16211c,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+---------------------------------+-------------------------

{'runId': 'AutoML_eebfa5d9-c5f5-4e46-8969-6d9cbb16211c',
 'target': 'alpha',
 'status': 'Completed',
 'startTimeUtc': '2021-08-17T16:18:50.181632Z',
 'endTimeUtc': '2021-08-17T16:58:34.71994Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '3',
  'target': 'alpha',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"1e01ef70-508c-4dd8-bdde-5c24d4f31bd7\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.32.0", "azureml-train": "1.32.0", "azureml-train-restclients-hyperdrive": "1.32.0", "azureml-train-core": "1.32.0", "azureml-train-automl": "1.32.0", "azureml-train-automl-runtime": "1.32.0", "azureml-train-automl-client": "1.32.0", "azureml-tensorbo

In [12]:
# Retrieve and save your best automl model.

import joblib
# Get your best run and save the model from that run.

best_run_aml, best_model_aml = automl.get_output()
best_run_metrics = best_run_aml.get_metrics()

print("\n------------\n")
print('Best run ID: ', best_run_aml.id,'\n Best run Accuracy: ', best_run_metrics['accuracy'],'\n ----- \n Metrics: ', best_run_metrics)
print("\n------------\n")


### YOUR CODE HERE ###
model = best_run_aml.register_model(model_name='azmle-project-lancia-automl', model_path='outputs/model.pkl')

Package:azureml-automl-runtime, training version:1.33.0, current version:1.32.0
Package:azureml-core, training version:1.33.0, current version:1.32.0
Package:azureml-dataprep, training version:2.20.1, current version:2.18.0
Package:azureml-dataprep-native, training version:38.0.0, current version:36.0.0
Package:azureml-dataprep-rslex, training version:1.18.0, current version:1.16.1
Package:azureml-dataset-runtime, training version:1.33.0, current version:1.32.0
Package:azureml-defaults, training version:1.33.0, current version:1.32.0
Package:azureml-interpret, training version:1.33.0, current version:1.32.0
Package:azureml-mlflow, training version:1.33.0, current version:1.32.0
Package:azureml-pipeline-core, training version:1.33.0, current version:1.32.0
Package:azureml-responsibleai, training version:1.33.0, current version:1.32.0
Package:azureml-telemetry, training version:1.33.0, current version:1.32.0
Package:azureml-train-automl-client, training version:1.33.0, current version:1.


------------

Best run ID:  AutoML_eebfa5d9-c5f5-4e46-8969-6d9cbb16211c_20 
 Best run Accuracy:  0.917488709552918 
 ----- 
 Metrics:  {'weighted_accuracy': 0.9525246702127411, 'f1_score_weighted': 0.916090911083235, 'f1_score_micro': 0.917488709552918, 'f1_score_macro': 0.7855374738262869, 'precision_score_macro': 0.7957074201157059, 'recall_score_weighted': 0.917488709552918, 'average_precision_score_micro': 0.9813996352666304, 'matthews_correlation': 0.5716865566922733, 'AUC_micro': 0.9806237854130829, 'average_precision_score_weighted': 0.9550456918966544, 'recall_score_macro': 0.7763086662553315, 'accuracy': 0.917488709552918, 'recall_score_micro': 0.917488709552918, 'AUC_macro': 0.9471475555165041, 'precision_score_micro': 0.917488709552918, 'norm_macro_recall': 0.5526173325106628, 'AUC_weighted': 0.9471475555165041, 'precision_score_weighted': 0.9149356820944999, 'average_precision_score_macro': 0.8231082071046837, 'log_loss': 0.2521444398714049, 'balanced_accuracy': 0.77630866