In [1]:
from azureml.core import Workspace, Experiment, ScriptRunConfig

# ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-123926
Azure region: southcentralus
Subscription id: ac15aef5-0abe-4be6-a0bd-40abc1594138
Resource group: aml-quickstarts-123926


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

# name for CPU cluster
mycompute_cluster_name = "sayed-cluster"

# Verifying if the cluster exists
try:
    my_compute = ComputeTarget(workspace=ws, name=mycompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',max_nodes=4)
    my_compute = ComputeTarget.create(ws, mycompute_cluster_name, compute_config)

my_compute.wait_for_completion(show_output=True)

Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [14]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import normal, uniform, choice
import os

script_folder = './'
script='train.py'


# Specify parameter sampler
# ps = ### YOUR CODE HERE ###

ps = RandomParameterSampling(
     {
        '--C': choice(0.01,5,20,100,500), 
        '--max_iter': choice(10,50,100,150,200)
     }
)

# Specify a Policy
# policy = ### YOUR CODE HERE ###
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=2)

#accuracy set as primary metric, focuses on maximizing "accuracy"
primary_metric_name="Accuracy"
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE

max_total_runs=20
max_concurrent_runs=4

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
# est = ### YOUR CODE HERE ###
est = SKLearn(script_folder,
        compute_target=my_compute, 
        entry_script=script)


# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
# hyperdrive_config = ### YOUR CODE HERE ###
hyperdrive_config = HyperDriveConfig(estimator = est,
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name=primary_metric_name,
                             primary_metric_goal=primary_metric_goal,
                             max_total_runs=max_total_runs,
                             max_concurrent_runs=max_concurrent_runs)

In [15]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run = exp.submit(config = hyperdrive_config, show_output=True)
RunDetails(hyperdrive_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [18]:
hyperdrive_run.get_status()

'CancelRequested'

In [16]:
import joblib
# Get your best run and save the model from that run.


### YOUR CODE HERE ###
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']
best_run_model_names = best_run.get_file_names()

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n best_run_model_names:',best_run_model_names)
print('\n best_run:',best_run.get_details())




Best Run Id:  HD_e843fdf6-1e59-4446-a6ff-37d9a6a1f97b_3

 Accuracy: 0.9128983308042489

 best_run_model_names: ['azureml-logs/55_azureml-execution-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/65_job_prep-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/110_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']

 best_run: {'runId': 'HD_e843fdf6-1e59-4446-a6ff-37d9a6a1f97b_3', 'target': 'sayed-cluster', 'status': 'Completed', 'startTimeUtc': '2020-10-30T17:08:19.563373Z', 'endTimeUtc': '2020-10-30T17:10:27.881626Z', 'properties': {'_azureml.ComputeTargetType': 'amlcompute', 'ContentSnapshotId': '3a9644e7-54a2-41e5-ad23-74801c0fcecf', 'ProcessInfoFile': 'az

In [17]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,HD_e843fdf6-1e59-4446-a6ff-37d9a6a1f97b_3,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [20]:
# create an output folder
os.makedirs('outputs', exist_ok=True)
model = best_run.register_model(model_name='best_model_sayed', 
                           model_path='outputs/mymodel.joblib')

ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path outputs/model.joblib in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/65_job_prep-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/110_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path outputs/model.joblib in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/65_job_prep-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_f0114c4a641f5866bea1efe3c93cd93197c04a2d227e83a831bda39ec88d2b93_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/110_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']\n                See https://aka.ms/run-logging for more details."
    }
}

In [5]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###

paths_url = [
    'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
]
dataset = TabularDatasetFactory.from_delimited_files(path=paths_url)

In [6]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(dataset)

In [7]:
import pandas as pd

cleanedData = pd.concat([x,y], axis =1)
cleanedData

Unnamed: 0,age,marital,default,housing,loan,month,day_of_week,duration,campaign,pdays,...,contact_telephone,education_basic.4y,education_basic.6y,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,y
0,57,1,0,0,1,5,1,371,1,999,...,0,0,0,0,1,0,0,0,0,0
1,55,1,0,1,0,5,4,285,2,999,...,1,0,0,0,0,0,0,0,1,0
2,33,1,0,0,0,5,5,52,1,999,...,0,0,0,1,0,0,0,0,0,0
3,36,1,0,0,0,6,5,355,4,999,...,1,0,0,0,1,0,0,0,0,0
4,27,1,0,1,0,7,5,189,2,999,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32945,56,1,0,0,1,7,1,116,1,999,...,0,1,0,0,0,0,0,0,0,0
32946,37,1,0,0,1,7,5,69,7,999,...,0,0,0,0,0,0,0,1,0,0
32947,26,0,0,0,0,5,2,135,4,999,...,0,0,0,0,0,0,0,1,0,0
32948,31,0,0,0,0,4,1,386,1,999,...,0,0,0,1,0,0,0,0,0,0


In [8]:
from sklearn.model_selection import train_test_split

data_train, data_test = train_test_split(cleanedData, test_size=0.2, random_state=111)
data_train

Unnamed: 0,age,marital,default,housing,loan,month,day_of_week,duration,campaign,pdays,...,contact_telephone,education_basic.4y,education_basic.6y,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,y
17428,35,0,0,0,0,8,2,255,1,999,...,0,0,0,0,0,0,1,0,0,0
8403,30,0,0,0,0,6,3,587,1,999,...,1,0,0,0,0,0,0,1,0,1
6111,38,1,0,0,0,5,3,479,2,999,...,1,0,0,0,0,0,1,0,0,0
9960,36,1,0,1,0,5,1,133,1,999,...,1,0,0,0,1,0,0,0,0,0
2571,29,1,0,1,1,8,3,94,2,999,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7490,60,0,0,1,0,6,4,45,2,999,...,1,1,0,0,0,0,0,0,0,0
25257,32,0,0,1,0,8,2,152,3,999,...,0,0,0,0,0,0,0,1,0,0
4820,41,0,0,0,0,5,1,18,4,999,...,0,0,0,0,1,0,0,0,0,0
10196,57,1,0,1,0,8,5,160,1,999,...,0,0,0,0,0,0,1,0,0,0


In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.

# automl_config = AutoMLConfig(
#     task=,
#     primary_metric=,
#     training_data=,
#     label_column_name=,
#     n_cross_validations=)

   automl_settings = {
       "n_cross_validations": 5,
       "primary_metric": 'Accuracy',
       "enable_early_stopping": True,
       "max_concurrent_iterations": 4,
       "max_cores_per_iteration": -1
   }
   automl_config = AutoMLConfig(task = 'classification',
                                experiment_timeout_minutes=30,
                               compute_target = compute_target,
                               training_data = data_train,
                               label_column_name = 'y',
                               **automl_settings
                               )
 



In [2]:
# Submit your automl run
   ws = Workspace.from_config()
   run = experiment.submit(automl_config, show_output=True)
### YOUR CODE HERE ###

In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###