In [None]:
import pandas as pd

from azureml.core import Workspace, Experiment

# from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling, BayesianParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
import os

import warnings
warnings.filterwarnings('ignore')

In [None]:
from azureml.core import Workspace, Experiment

ws =Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print(ws.get_details()['id'])

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute


compute_name = "DS2V2"

try:
    vm = ComputeTarget(ws, compute_name)
    print(f"{compute_name} exists already")
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2_V2", max_nodes=4)
    vm = ComputeTarget.create(ws, compute_name, compute_config)
    
vm.wait_for_completion(show_output=True)

In [None]:
# Specify parameter sampler



param_space = { 
                                    "--impute"     : choice('mean', 'median'), 
                                    "--kernel": choice("rbf","linear"),
                                    "--gamma": choice("auto","scale"),
                                    "--penalty": choice(0.01,1,10,20,100)  
                                    
              }

sampling = RandomParameterSampling(param_space)

# Specifying Bandit Policy. 
# ROC will be evaluated at every run, starting from 21st run. 
# If the performance in the successive runs is below 91% of the best performing run, HPO will be stopped


policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=20)


if "training" not in os.listdir():
    os.mkdir("./training")
import shutil
shutil.copy('train.py', './training')
    
# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./training', 
              compute_target=vm, 
              entry_script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est, 
                                     policy=policy, 
                                     primary_metric_name="AUC",
                                     hyperparameter_sampling=sampling,
                                     max_total_runs=200,
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE)