In [1]:
import azureml
from azureml.core import Workspace, Run

# Check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.8


In [2]:
from azureml.core import Workspace, Experiment
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name = 'tsbacktest')

Found the config file in: /data/home/tsperfadmin/Projects/zhouf/energy_forecast_fnn_model_v1/TSPerf/energy_load/GEFCom2017_D_Prob_MT_hourly/submissions/fnn/config.json


In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

# choose a name for your cluster
compute_name =  "hlutsperfnn"
compute_min_nodes = 0
compute_max_nodes = 16

vm_size = "STANDARD_D3_V2"


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

     # For a more detailed view of current AmlCompute status, use the 'status' property    
    print(compute_target.status.serialize())

found compute target. just use it. hlutsperfnn


In [4]:
from azureml.core.runconfig import EnvironmentDefinition
from azureml.core.conda_dependencies import CondaDependencies

env = EnvironmentDefinition()

env.python.user_managed_dependencies = False
env.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas', 'r-base', 'r-data.table', 'r-rjson', 'r-optparse', 'r-doparallel'], python_version='3.6.2')
env.python.conda_dependencies.add_channel('conda-forge')
env.docker.enabled=True

In [9]:
from azureml.train.estimator import Estimator
from azureml.train.hyperdrive import *

script_folder = './'

script_params = {
    '--n_hidden_1': 5, 
    '--n_hidden_2': 5,
    '--iter_max': 3,
    '--penalty': 0,
    '--path': ws.get_default_datastore().as_mount(),
    '--cv_path': ws.get_default_datastore().as_mount()
}

est = Estimator(source_directory=script_folder,
                script_params=script_params,
                compute_target=compute_target,
                use_docker=True,
                entry_script='aml_estimator.py',
                environment_definition=env)

ps = GridParameterSampling({
        '--n_hidden_1': choice(4, 8), 
        '--n_hidden_2': choice(4, 8),
        '--iter_max': choice(1, 2, 4, 6, 8, 10),
        '--penalty': choice(0, 0.001),
})

htc2 = HyperDriveRunConfig(estimator=est, 
                          hyperparameter_sampling=ps, 
                          primary_metric_name='average pinball loss', 
                          primary_metric_goal=PrimaryMetricGoal.MINIMIZE, 
                          max_concurrent_runs=16,
                          max_total_runs=50)
htr2 = exp.submit(config=htc2)



In [35]:
from azureml.widgets import RunDetails
RunDetails(htr2).show()

In [18]:
best_run = htr2.get_best_run_by_primary_metric()
parameter_values = best_run.get_details()['runDefinition']['Arguments']
print(parameter_values)

In [None]:
from azureml.core import Run
r = Run(exp, "tsbacktest_1547665964004")

In [16]:
import pandas as pd
results = r.get_children()

results_dict = {'pinball_loss': [], 'n_hidden_1': [], 'n_hidden_2': [], 'iter_max': [], 'penalty': []} 
for child_run in results:
    if child_run.get_status() == "Completed":
        arguments = child_run.get_details()['runDefinition']['Arguments']
        results_dict['pinball_loss'].append(child_run.get_metrics()['average pinball loss'])
        results_dict['n_hidden_1'].append(int(arguments[5]))
        results_dict['n_hidden_2'].append(int(arguments[7]))
        results_dict['iter_max'].append(int(arguments[9]))
        results_dict['penalty'].append(float(arguments[11]))

results_df = pd.DataFrame.from_dict(results_dict)

In [17]:
results_df.sort_values('pinball_loss')

Unnamed: 0,pinball_loss,n_hidden_1,n_hidden_2,iter_max,penalty
44,81.271331,8,4,1,0.0
6,81.29237,4,4,1,0.001
15,81.323119,4,4,1,0.001
0,81.330934,4,4,1,0.001
13,81.35457,8,4,1,0.001
2,81.367994,8,4,1,0.001
3,81.371252,4,4,1,0.001
33,81.412618,4,8,1,0.0
5,81.431119,4,8,1,0.001
39,81.436431,4,4,1,0.0


In [20]:
results_df.to_csv('cv_results.csv', index=False)