In [24]:
## https://github.com/khalidw/Optimizing_a_Pipeline_in_Azure-ML/blob/master/udacity-project.ipynb

In [13]:
import pandas as pd

from azureml.core import Workspace, Experiment

# from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
import os

In [14]:
from azureml.core import Workspace, Experiment

ws =Workspace.from_config()

print(ws.get_details()['id'])

/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourceGroups/aml-quickstarts-135353/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-135353


In [15]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
# Compute name should contain only letters, digits, hyphen and should be 2-16 charachters long
compute_name = "DS2V2"
try:
    trainCluster = ComputeTarget(ws, compute_name)
    print(f"{compute_name} exists already")
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2_V2", max_nodes=4)
    trainCluster = ComputeTarget.create(ws, compute_name, compute_config)
    
trainCluster.wait_for_completion(show_output=True)

DS2V2 exists already

Running


In [22]:

# Specify parameter sampler
param_space = { 
                                    "--C"     : choice(100, 10, 1.0, 0.1, 0.01), 
                                    "--solver": choice("lbfgs", "liblinear", "sag"),
                                    "--reg": choice("l1","l2",None,'elasticnet')            
                                    
              }

sampling = RandomParameterSampling(param_space)

# Specifying Bandit Policy. 
# ROC will be evaluated at every run, starting from 11th run. 
# If the performance in the successive runs is below 91% of the best performing run, HPO will be stopped
policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=20)

# Code below makes a new directory for training and copies the train script
if "training" not in os.listdir():
    os.mkdir("./training")
import shutil
shutil.copy('train.py', './training')
    
# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./training', compute_target=trainCluster, entry_script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est, 
                                     policy=policy, 
                                     primary_metric_name="AUC",
                                     hyperparameter_sampling=sampling,
                                     max_total_runs=200,
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE)

In [23]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
exp = Experiment(workspace=ws, name="hyperdrive")
hyperDrive_run = exp.submit(hyperdrive_config)
RunDetails(hyperDrive_run).show()



In [28]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
best_run = hyperDrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\n AUC:', best_run_metrics['AUC']*100)
print('\n Regularization Strength:',best_run_metrics['Regularization:'])
print('\n Solvers:',best_run_metrics['Solver:'])


Best Run Id:  HD_61f2dda2-2ba0-49e4-8d6a-20a6dc72b721_6

 AUC: 77.3585596050799

 Regularization Strength: l1

 Solvers: liblinear


# AutoML

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path)

In [29]:
ws.datasets

{'Bank-marketing': DatasetRegistration(id='2452a598-b996-40a7-95e4-34f16c2c3331', name='Bank-marketing', version=1, description='', tags={})}

In [30]:
aml_data = ws.datasets.get('Bank-marketing')

In [32]:
aml_data.to_pandas_dataframe()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.860,5191.0,no
2,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,...,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no
3,36,admin.,married,high.school,no,no,no,telephone,jun,fri,...,4,999,0,nonexistent,1.4,94.465,-41.8,4.967,5228.1,no
4,27,housemaid,married,high.school,no,yes,no,cellular,jul,fri,...,2,999,0,nonexistent,1.4,93.918,-42.7,4.963,5228.1,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32945,56,housemaid,married,basic.4y,no,no,yes,cellular,jul,mon,...,1,999,0,nonexistent,1.4,93.918,-42.7,4.960,5228.1,no
32946,37,management,married,university.degree,no,no,yes,cellular,jul,fri,...,7,999,0,nonexistent,1.4,93.918,-42.7,4.957,5228.1,no
32947,26,admin.,single,university.degree,no,no,no,cellular,may,tue,...,4,999,1,failure,-1.8,92.893,-46.2,1.266,5099.1,no
32948,31,blue-collar,single,basic.9y,no,no,no,cellular,apr,mon,...,1,999,0,nonexistent,-1.8,93.075,-47.1,1.405,5099.1,no


In [34]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
# automl_config = AutoMLConfig(
#     experiment_timeout_minutes=60,
#     task='classification',
#     primary_metric='auc',
#     compute_target=trainCluster,
#     training_data=aml_data,
#     label_column_name='y',
#     n_cross_validations=5)

ImportError: cannot import name 'LocalHistoryPayload'