In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()

"""Create a workspace named "udacity-project" or
remember to change the number 127939 each time a VM is run"""

exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code ACY2K4KDV to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-142400
Azure region: southcentralus
Subscription id: f9d5a085-54dc-4215-9ba6-dad5d86e60a0
Resource group: aml-quickstarts-142400


In [2]:
# to check the name & config of the workspace. This ia optional code
ws = Workspace.from_config() 
ws 

Workspace.create(name='quick-starts-ws-142400', subscription_id='f9d5a085-54dc-4215-9ba6-dad5d86e60a0', resource_group='aml-quickstarts-142400')

In [3]:
# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2',
                                                              max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

print(cpu_cluster.get_status().serialize())

Creating....
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-04-11T09:13:55.810000+00:00', 'errors': None, 'creationTime': '2021-04-11T09:13:53.511977+00:00', 'modifiedTime': '2021-04-11T09:14:09.269634+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os

# Specify parameter sampler
from azureml.train.hyperdrive.parameter_expressions import choice

ps = RandomParameterSampling({"--C": choice(0.1,0.3,0.5,1,10,50,100),
"--max_iter": choice(50,100,150,200,250)})

# Specify a Policy for early trermination
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval = 1)

if "training" not in os.listdir():
    os.mkdir("./training")

# source_directory =  os.getcwd()+'/training'
script_folder = './training'
os.makedirs(script_folder, exist_ok=True)
import shutil
shutil.copy('./train.py', script_folder)

# Create a SKLearn estimator for use with train.py
est =  SKLearn(source_directory = script_folder , 
entry_script = 'train.py', 
compute_target = cpu_cluster)

# from azureml.core import ScriptRunConfig, Experiment

# #env = Environment.get(ws, name='MyEnvironment')
# # configure and submit your training run
# config = ScriptRunConfig(source_directory='.',
#                         command=['ls', '-l'],
#                         compute_target= cpu_cluster)
# script_run = exp.submit(config)



# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator= est,
                             hyperparameter_sampling= ps,
                             policy= policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=15,
                             max_concurrent_runs=2)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [5]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
# from azureml.core.experiment import Experiment
# experiment = Experiment(ws, ws.name)


hyperdrive_run = exp.submit(hyperdrive_config, show_output = True)

from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [6]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_cd819cb1-7476-4d20-990a-31369d96911d
Web View: https://ml.azure.com/runs/HD_cd819cb1-7476-4d20-990a-31369d96911d?wsid=/subscriptions/f9d5a085-54dc-4215-9ba6-dad5d86e60a0/resourcegroups/aml-quickstarts-142400/workspaces/quick-starts-ws-142400&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Execution Summary
RunId: HD_cd819cb1-7476-4d20-990a-31369d96911d
Web View: https://ml.azure.com/runs/HD_cd819cb1-7476-4d20-990a-31369d96911d?wsid=/subscriptions/f9d5a085-54dc-4215-9ba6-dad5d86e60a0/resourcegroups/aml-quickstarts-142400/workspaces/quick-starts-ws-142400&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_cd819cb1-7476-4d20-990a-31369d96911d',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-04-11T09:14:21.405691Z',
 'endTimeUtc': '2021-04-11T09:29:09.010711Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '9bd96615-2cad-43a0-b135-1b6466f1454c',
  'score': '0.9072837632776934',
  'best_child_run_id': 'HD_cd819cb1-7476-4d20-990a-31369d96911d_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg142400.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_cd819cb1-7476-4d20-990a-31369d96911d/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=oMoxoJzDI2W7xJ9lcJBdGpGqU5lB9WVDtjvKMj1d7S0%3D&st=2021-04-11T09%3A19%3A10Z&se=2021-04-11T17%3A29%3A10Z&sp=r'},
 'submittedBy': 'ODL_User 142400'}

In [7]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
best_run = hyperdrive_run.get_best_run_by_primary_metric()

best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run.get_metrics()['Accuracy'])
print('\n Regularization Strength:', best_run.get_metrics()['Regularization Strength:'])
print('\n learning rate:',parameter_values[3])
# print('\n keep probability:',parameter_values[5])
# print('\n batch size:',parameter_values[7])


Best Run Id:  HD_cd819cb1-7476-4d20-990a-31369d96911d_1

 Accuracy: 0.9072837632776934

 Regularization Strength: 10.0

 learning rate: 100


In [8]:
best_run_metrics

{'Regularization Strength:': 10.0,
 'Max iterations:': 100,
 'Accuracy': 0.9072837632776934}

In [9]:
print(best_run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_d120093bfb5e4392da532058ec49c62a312fbc469ad95776476fe5349c07d980_d.txt', 'azureml-logs/65_job_prep-tvmps_d120093bfb5e4392da532058ec49c62a312fbc469ad95776476fe5349c07d980_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d120093bfb5e4392da532058ec49c62a312fbc469ad95776476fe5349c07d980_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/106_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.joblib']


In [10]:
hd_model = best_run.register_model(model_name = 'best_hd_model', model_path = 'outputs/model.joblib')#,model_framework=model.Framework.SCIKITLEARN, model_framework_version='0.19.1')
print(best_run)

Run(Experiment: udacity-project,
Id: HD_cd819cb1-7476-4d20-990a-31369d96911d_1,
Type: azureml.scriptrun,
Status: Completed)


In [11]:
# filename = 'model.joblib'
# joblib.dump(best_run, filename)

In [12]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset, Datastore
# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
url = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
ds = TabularDatasetFactory.from_delimited_files(path=url)

In [13]:
ds.to_pandas_dataframe()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.860,5191.0,no
2,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,...,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no
3,36,admin.,married,high.school,no,no,no,telephone,jun,fri,...,4,999,0,nonexistent,1.4,94.465,-41.8,4.967,5228.1,no
4,27,housemaid,married,high.school,no,yes,no,cellular,jul,fri,...,2,999,0,nonexistent,1.4,93.918,-42.7,4.963,5228.1,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32945,56,housemaid,married,basic.4y,no,no,yes,cellular,jul,mon,...,1,999,0,nonexistent,1.4,93.918,-42.7,4.960,5228.1,no
32946,37,management,married,university.degree,no,no,yes,cellular,jul,fri,...,7,999,0,nonexistent,1.4,93.918,-42.7,4.957,5228.1,no
32947,26,admin.,single,university.degree,no,no,no,cellular,may,tue,...,4,999,1,failure,-1.8,92.893,-46.2,1.266,5099.1,no
32948,31,blue-collar,single,basic.9y,no,no,no,cellular,apr,mon,...,1,999,0,nonexistent,-1.8,93.075,-47.1,1.405,5099.1,no


In [14]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

In [16]:

print(type(x))
print(type(y))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [17]:
from sklearn.model_selection import train_test_split
final_data = x.join(y)
train_data, test_data = train_test_split(final_data, random_state = 42)

In [18]:
final_data.head()

Unnamed: 0,age,marital,default,housing,loan,month,day_of_week,duration,campaign,pdays,...,contact_telephone,education_basic.4y,education_basic.6y,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,y
0,57,1,0,0,1,5,1,371,1,999,...,0,0,0,0,1,0,0,0,0,0
1,55,1,0,1,0,5,4,285,2,999,...,1,0,0,0,0,0,0,0,1,0
2,33,1,0,0,0,5,5,52,1,999,...,0,0,0,1,0,0,0,0,0,0
3,36,1,0,0,0,6,5,355,4,999,...,1,0,0,0,1,0,0,0,0,0
4,27,1,0,1,0,7,5,189,2,999,...,0,0,0,0,1,0,0,0,0,0


In [19]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    compute_target = cpu_cluster,
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    enable_onnx_compatible_models=True,
    n_cross_validations=3)

In [20]:
# Submit your automl run

### YOUR CODE HERE ###
exp = Experiment(ws,"automl_test")
automl_model = exp.submit(automl_config, show_output=True)

from azureml.widgets import RunDetails
RunDetails(automl_model).show()

best_run, fitted_model = automl_model.get_output()
print(best_run)
print(fitted_model)

Submitting remote run.
No run_configuration provided, running on cpu-cluster with default configuration
Running on remote compute: cpu-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
automl_test,AutoML_2a8b2992-3168-44d2-9586-ff6b66049369,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation





In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###
from azureml.automl.runtime.onnx_convert import OnnxConverter
best_run_model, onnx_model= automl_model.get_output(return_onnx_model=True)
OnnxConverter.save_onnx_model(onnx_model, file_path="./automl_model.onnx")

In [None]:
# #Delete compute_target
# compute_target.delete()