# Hyperparameter Tuning using HyperDrive

In [1]:
import datetime
import json
import pathlib
import requests

import joblib
from sklearn.metrics import classification_report

In [2]:
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core.dataset import Dataset
from azureml.widgets import RunDetails

In [3]:
from nd00333.model.hyperdrive.train import run_config as hd_train_config
from nd00333.model.deploy import run_config as deploy_config
from nd00333 import utils as package_utils

In [4]:
print("azureml.core.VERSION", azureml.core.VERSION)

azureml.core.VERSION 1.21.0


## Create an experiment

Create an experiment identified by the creation date. The purpose of identifying the experiments is to not mix the manually run experiments (using this jupyter notebook) with the experiments run using the deployment automation of the master git branch.


In [5]:
import logging
logging.basicConfig(level=logging.ERROR)

In [6]:
#!az logout

In [7]:
workspace = Workspace.from_config()
utcnow = datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%m")
utcnow = "2021-01-29-09-01"
experiment_name = f"jupyter-{utcnow}"

{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}


In [8]:
experiment = Experiment(workspace, experiment_name)
print(f"experiment_name {experiment_name}")

{"message": "Created a worker pool for first use"}
{"message": "Created a worker pool for first use"}
{"message": "Created a worker pool for first use"}
{"message": "Created a worker pool for first use"}


experiment_name jupyter-2021-01-29-09-01


## Dataset

Load the dataset from local files on disk into the default AzureML workspace datastore.
Store the versioned (by the "_1" suffix) train, validate and test sets under separate datastore paths. The uploading of large datasets tends to break with "The write operation timed out" when using the Python SDK and therefore the `az` is used. See https://docs.microsoft.com/en-us/answers/questions/43980/cannot-upload-local-files-to-azureml-datastore-pyt.html

In [9]:
dataset_train_name = "ids2018train_1"
dataset_validate_name = "ids2018validate_1"
dataset_test_name = "ids2018test_1"
dataset_2017_name = "ids2017full_1"

In [9]:
!az login

[33mTo sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code D85M4TGUT to authenticate.[0m
[
  {
    "cloudName": "AzureCloud",
    "homeTenantId": "660b3398-b80e-49d2-bc5b-ac1dc93b5254",
    "id": "2c48c51c-bd47-40d4-abbe-fb8eabd19c8c",
    "isDefault": true,
    "managedByTenants": [],
    "name": "Udacity CloudLabs Sub - 03",
    "state": "Enabled",
    "tenantId": "660b3398-b80e-49d2-bc5b-ac1dc93b5254",
    "user": {
      "name": "odl_user_136531@udacitylabs.onmicrosoft.com",
      "type": "user"
    }
  }
]
[0m

In [10]:
!az extension add --name azure-cli-ml --version 1.21.0

[33mExtension 'azure-cli-ml' is already installed.[0m
[0m

In [11]:
!az --version

azure-cli                         2.18.0

core                              2.18.0
telemetry                          1.0.6

Extensions:
azure-cli-ml                      1.21.0

Python location '/opt/az/bin/python3'
Extensions directory '/home/vscode/.azure/cliextensions'

Python (Linux) 3.6.10 (default, Jan 15 2021, 09:54:35) 
[GCC 8.3.0]

Legal docs and information: aka.ms/AzureCliLegal


Your CLI is up-to-date.

[33m[1mPlease let us know how we are doing: [34mhttps://aka.ms/azureclihats[0m
[33m[1mand let us know if you're interested in trying out our newest features: [34mhttps://aka.ms/CLIUXstudy[0m
[0m

In [12]:
%%time
!az ml datastore upload --name workspaceblobstore --verbose \
    --src-path datasets/ids2018train --target-path \
    $dataset_train_name

[32mFound the config file in: /app/config.json[0m
Uploading an estimated of 1 files
Uploading datasets/ids2018train/data.csv
Uploaded datasets/ids2018train/data.csv, 1 files out of an estimated total of 1
Uploaded 1 files
[32mCommand ran in 1160.690 seconds (init: 0.226, invoke: 1160.464)[0m
[0mCPU times: user 1min 6s, sys: 11.8 s, total: 1min 17s
Wall time: 19min 22s


In [13]:
%%time
!az ml datastore upload --name workspaceblobstore --verbose \
    --src-path datasets/ids2018validate --target-path \
    $dataset_validate_name

[32mFound the config file in: /app/config.json[0m
Uploading an estimated of 1 files
Uploading datasets/ids2018validate/data.csv
Uploaded datasets/ids2018validate/data.csv, 1 files out of an estimated total of 1
Uploaded 1 files
[32mCommand ran in 238.429 seconds (init: 0.305, invoke: 238.124)[0m
[0mCPU times: user 13.9 s, sys: 2.6 s, total: 16.5 s
Wall time: 3min 59s


In [14]:
%%time
!az ml datastore upload --name workspaceblobstore --verbose \
    --src-path datasets/ids2018test --target-path \
    $dataset_test_name

[32mFound the config file in: /app/config.json[0m
Uploading an estimated of 1 files
Target already exists. Skipping upload for ids2018test_1/data.csv
Uploaded 0 files
[32mCommand ran in 6.275 seconds (init: 0.273, invoke: 6.002)[0m
[0mCPU times: user 325 ms, sys: 41.6 ms, total: 367 ms
Wall time: 7.73 s


In [15]:
%%time
!az ml datastore upload --name workspaceblobstore --verbose \
    --src-path datasets/ids2017full --target-path \
    $dataset_2017_name

[32mFound the config file in: /app/config.json[0m
Uploading an estimated of 8 files
Target already exists. Skipping upload for ids2017full_1/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Friday-WorkingHours-Morning.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Monday-WorkingHours.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Tuesday-WorkingHours.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Wednesday-workingHours.pcap_ISCX.csv
Target already exists. Skipping upload for ids2017full_1/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Uploaded 0 files
[32mCommand ran in 7.632 seconds (init: 0.308, invoke: 7.323)[0m
[0mCPU t

Upload and register the uploaded datasets into AzureML workspace. In principle the above `az` commands should not be necessary.

In [16]:
%%time
!PYTHONPATH=. python nd00333/dataset/register/register.py \
    --dataset-path datasets --dataset-name ids2018train --dataset-version 1 \
    --dataset-type file

{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "<azureml.core.authentication.InteractiveLoginAuthentication object at 0x7fc5fc88e890>"}
{"message": "datastore.upload_files", "kwargs": {"files": ["/app/datasets/ids2018train/data.csv"], "target_path": "ids2018train_1", "overwrite": false}}
{"message": "Called AzureBlobDatastore.upload_files"}
Uploading an estimated of 1 files
{"message": "Uploading an estimated of 1 files"}
Target already exists. Skipping upload for ids2018train_1/data.csv
{"message": "Target already exists. Skipping upload for ids2018train_1/data.csv"}
Uploaded 0 files
{"message": "Uploaded 0 files"}
{"message": "Finished AzureBlobDatastore.upload with count=0."}
{"message": "Dataset.File.from_files", "datastore_path": ["<azureml.data.datapath.DataPath object at 0x7fc5fc8a2a50>"]}
{"message": "file_dataset.register", "kwargs": {"workspace": "Workspace.create(name='quick-starts-ws-136633', s

In [17]:
%%time
!PYTHONPATH=. python nd00333/dataset/register/register.py \
    --dataset-path datasets --dataset-name ids2018validate --dataset-version 1 \
    --dataset-type file

{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "<azureml.core.authentication.InteractiveLoginAuthentication object at 0x7fa5ee6b0850>"}
{"message": "datastore.upload_files", "kwargs": {"files": ["/app/datasets/ids2018validate/data.csv"], "target_path": "ids2018validate_1", "overwrite": false}}
{"message": "Called AzureBlobDatastore.upload_files"}
Uploading an estimated of 1 files
{"message": "Uploading an estimated of 1 files"}
Target already exists. Skipping upload for ids2018validate_1/data.csv
{"message": "Target already exists. Skipping upload for ids2018validate_1/data.csv"}
Uploaded 0 files
{"message": "Uploaded 0 files"}
{"message": "Finished AzureBlobDatastore.upload with count=0."}
{"message": "Dataset.File.from_files", "datastore_path": ["<azureml.data.datapath.DataPath object at 0x7fa5ee6b8150>"]}
{"message": "file_dataset.register", "kwargs": {"workspace": "Workspace.create(name='quick-starts-w

In [18]:
%%time
!PYTHONPATH=. python nd00333/dataset/register/register.py \
     --dataset-path datasets --dataset-name ids2018test --dataset-version 1 \
     --dataset-type tabular

{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "<azureml.core.authentication.InteractiveLoginAuthentication object at 0x7fc7c82507d0>"}
{"message": "datastore.upload_files", "kwargs": {"files": ["/app/datasets/ids2018test/data.csv"], "target_path": "ids2018test_1", "overwrite": false}}
{"message": "Called AzureBlobDatastore.upload_files"}
Uploading an estimated of 1 files
{"message": "Uploading an estimated of 1 files"}
Target already exists. Skipping upload for ids2018test_1/data.csv
{"message": "Target already exists. Skipping upload for ids2018test_1/data.csv"}
Uploaded 0 files
{"message": "Uploaded 0 files"}
{"message": "Finished AzureBlobDatastore.upload with count=0."}
{"message": "TabularDatasetFactory.from_delimited_files", "kwargs": {"path": ["<azureml.data.datapath.DataPath object at 0x7fc7c8257a50>"], "set_column_types": {"Flow Duration": "<azureml.data.dataset_factory.DataType object at 0x7fc7c

In [19]:
%%time
!PYTHONPATH=. python nd00333/dataset/register/register.py \
     --dataset-path datasets --dataset-name ids2017full --dataset-version 1 \
     --dataset-type tabular

{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "<azureml.core.authentication.InteractiveLoginAuthentication object at 0x7f619328b890>"}
{"message": "datastore.upload_files", "kwargs": {"files": ["/app/datasets/ids2017full/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv", "/app/datasets/ids2017full/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv", "/app/datasets/ids2017full/Friday-WorkingHours-Morning.pcap_ISCX.csv", "/app/datasets/ids2017full/Monday-WorkingHours.pcap_ISCX.csv", "/app/datasets/ids2017full/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv", "/app/datasets/ids2017full/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv", "/app/datasets/ids2017full/Tuesday-WorkingHours.pcap_ISCX.csv", "/app/datasets/ids2017full/Wednesday-workingHours.pcap_ISCX.csv"], "target_path": "ids2017full_1", "overwrite": false}}
{"message": "Called AzureBlobDatastore.upload_files"}
Uploading an estima

## HyperDrive Configuration

The hyperparameter tuning is performed using a grid search on the [RandomForestClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html). The following hyperparameters are varied:
-  the number of trees in the forest `n_estimators`, which affects the model generalization, however more trees will result in longer individual model training time,
- the `criterion` affects the sensitivity of the model to the minority classes, which is important for imbalanced datasets, like the one used here,
- the `max_depth` may result in overfitting if set too high, moreover it may considerably increase the size of the model (to e.g. several hundreds Mbytes).

The ` BanditPolicy` termination policy is set such that it allows the grid search to explore all hyperparameter values, and the `max_concurrent_runs` is set to the number of grid search runs. The individual model runs are performed multi-threaded using all cores available on the compute instance by setting `n_jobs=-1`. The number of `max_concurrent_runs` is set to the number of the nodes in the compute cluster.

See [nd00333/model/hyperdrive/train/run_config.py](nd00333/model/hyperdrive/train/run_config.py) for more details.

The `norm_macro_recall` is used as the performance metrics. See [README.md](README.md) for the rationale.

Perform the configuration of the HyperDrive run, including setting up the remote AML compute cluster

In [20]:
%%time
hyperdrive_run_config = hd_train_config.main(dataset_train_name=dataset_train_name,
                                             dataset_validate_name=dataset_validate_name)

{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "Found the config file in: /app/config.json"}
{"message": "Found existing cluster starts-ws-136633"}
{"message": "Found existing cluster starts-ws-136633"}
{"message": "Found existing cluster starts-ws-136633"}
{"message": "Found existing cluster starts-ws-136633"}
{"message": "main", "compute_target": {"id": "/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourceGroups/aml-quickstarts-136633/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-136633/computes/starts-ws-136633", "name": "starts-ws-136633", "location": "southcentralus", "tags": null, "identity": null, "properties": {"description": null, "resourceId": null, "computeType": "AmlCompute", "computeLocation": "southcentralus", "provisioningState": "Succeeded", "provisioningErrors": null, "properties": {"vmSize": "STANDA

{"message": "main", "compute_target": {"id": "/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourceGroups/aml-quickstarts-136633/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-136633/computes/starts-ws-136633", "name": "starts-ws-136633", "location": "southcentralus", "tags": null, "identity": null, "properties": {"description": null, "resourceId": null, "computeType": "AmlCompute", "computeLocation": "southcentralus", "provisioningState": "Succeeded", "provisioningErrors": null, "properties": {"vmSize": "STANDARD_D12_V2", "vmPriority": "LowPriority", "scaleSettings": {"minNodeCount": 0, "maxNodeCount": 5, "nodeIdleTimeBeforeScaleDown": "PT2400S"}, "userAccountCredentials": {"adminUserName": null, "adminUserPassword": null, "adminUserSshPublicKey": null}, "subnet": {"id": null}, "remoteLoginPortPublicAccess": "Enabled"}, "status": {"currentNodeCount": 1, "targetNodeCount": 5, "nodeStateCounts": {"preparingNodeCount": 0, "runningNodeCount": 1, "idleNodeCou

{"message": "main", "run_config": "{\n    \"script\": null,\n    \"arguments\": [],\n    \"target\": \"starts-ws-136633\",\n    \"framework\": \"Python\",\n    \"communicator\": \"None\",\n    \"maxRunDurationSeconds\": null,\n    \"nodeCount\": 1,\n    \"environment\": {\n        \"name\": \"hd-train\",\n        \"version\": null,\n        \"environmentVariables\": {\n            \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n        },\n        \"python\": {\n            \"userManagedDependencies\": false,\n            \"interpreterPath\": \"python\",\n            \"condaDependenciesFile\": null,\n            \"baseCondaEnvironment\": null,\n            \"condaDependencies\": {\n                \"name\": \"train\",\n                \"channels\": [\n                    \"conda-forge\"\n                ],\n                \"dependencies\": [\n                    \"python=3.7\",\n                    \"pip=20.1\",\n                    \"numpy=1.18\",\n                    \"scipy=1.4\",\n       

{"message": "If 'run_config' is specified, the following parameters will be ignored: 'compute_target', 'environment', 'distributed_job_config', and 'max_run_duration_seconds'."}
{"message": "If 'run_config' is specified, the following parameters will be ignored: 'compute_target', 'environment', 'distributed_job_config', and 'max_run_duration_seconds'."}
{"message": "If 'run_config' is specified, the following parameters will be ignored: 'compute_target', 'environment', 'distributed_job_config', and 'max_run_duration_seconds'."}
{"message": "If 'run_config' is specified, the following parameters will be ignored: 'compute_target', 'environment', 'distributed_job_config', and 'max_run_duration_seconds'."}


CPU times: user 994 ms, sys: 72.1 ms, total: 1.07 s
Wall time: 7.27 s


Submit the HyperDrive configuration to the compute cluster

In [21]:
%%time
hyperdrive_run = experiment.submit(config=hyperdrive_run_config, show_output=False)

{"message": "HyperDriveSubmit", "properties": {"core_sdk_version": "1.21.0", "input_type": "FileDataset", "dataset_id": "aa1dacbd-43b9-4cf1-be80-2f2a011a93e3", "dataset_name": "ids2018train_1", "dataset_version": "dataset.dataset.version", "consumption_mode": "mount", "compute": "starts-ws-136633", "subscription_id": "a24a24d5-8d87-4c8a-99b6-91ed2d2df51f", "resource_group_name": "aml-quickstarts-136633", "workspace_name": "quick-starts-ws-136633", "location": "southcentralus"}}
{"message": "HyperDriveSubmit", "properties": {"core_sdk_version": "1.21.0", "input_type": "FileDataset", "dataset_id": "aa1dacbd-43b9-4cf1-be80-2f2a011a93e3", "dataset_name": "ids2018train_1", "dataset_version": "dataset.dataset.version", "consumption_mode": "mount", "compute": "starts-ws-136633", "subscription_id": "a24a24d5-8d87-4c8a-99b6-91ed2d2df51f", "resource_group_name": "aml-quickstarts-136633", "workspace_name": "quick-starts-ws-136633", "location": "southcentralus"}}
{"message": "HyperDriveSubmit", "p

CPU times: user 1.25 s, sys: 303 ms, total: 1.55 s
Wall time: 6.64 s


In [22]:
%%time
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_67ab13fd-0409-4b2d-807f-a943d6e47e67
Web View: https://ml.azure.com/experiments/jupyter-2021-01-29-09-01/runs/HD_67ab13fd-0409-4b2d-807f-a943d6e47e67?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136633/workspaces/quick-starts-ws-136633

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-30T19:19:35.655157][API][INFO]Experiment created<END>\n""<START>[2021-01-30T19:19:36.257793][GENERATOR][INFO]Trying to sample '5' jobs from the hyperparameter space<END>\n""<START>[2021-01-30T19:19:36.416485][GENERATOR][INFO]Successfully sampled '5' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-30T19:19:36.8494663Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>


{"message": "25980287-21bd-4c09-a801-3f667d185d95 - CacheDriver:Cached token is expired at 2021-01-30 19:28:35.099456.  Refreshing"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - CacheDriver:Cached token is expired at 2021-01-30 19:28:35.099456.  Refreshing"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - CacheDriver:Cached token is expired at 2021-01-30 19:28:35.099456.  Refreshing"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - CacheDriver:Cached token is expired at 2021-01-30 19:28:35.099456.  Refreshing"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - TokenRequest:Getting a new token from a refresh token"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - TokenRequest:Getting a new token from a refresh token"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - TokenRequest:Getting a new token from a refresh token"}
{"message": "25980287-21bd-4c09-a801-3f667d185d95 - TokenRequest:Getting a new token from a refresh token"}
{"message": "25980287-21bd-4c09-a801


Execution Summary
RunId: HD_67ab13fd-0409-4b2d-807f-a943d6e47e67
Web View: https://ml.azure.com/experiments/jupyter-2021-01-29-09-01/runs/HD_67ab13fd-0409-4b2d-807f-a943d6e47e67?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136633/workspaces/quick-starts-ws-136633

CPU times: user 6.37 s, sys: 655 ms, total: 7.02 s
Wall time: 47min 53s


{'runId': 'HD_67ab13fd-0409-4b2d-807f-a943d6e47e67',
 'target': 'starts-ws-136633',
 'status': 'Completed',
 'startTimeUtc': '2021-01-30T19:19:35.403137Z',
 'endTimeUtc': '2021-01-30T20:06:45.919143Z',
 'properties': {'primary_metric_config': '{"name": "norm_macro_recall", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '6578d9a0-cd2c-45d5-95c1-ff450166ebd0',
  'score': '0.858415258389476',
  'best_child_run_id': 'HD_67ab13fd-0409-4b2d-807f-a943d6e47e67_9',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg136633.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_67ab13fd-0409-4b2d-807f-a943d6e47e67/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=EZ4AUGwDVSQvPIg3OmROVq86YjaHAaF9enFZbgAiNpo%3D&st=2021-01-30T19%3A57%3A28Z&se=2021-01-31T04%3A07%3A28Z&sp=r'},
 'submittedBy': 'ODL_User 

In [24]:
assert(hyperdrive_run.get_status() == "Completed")

## Run Details

The widget below shows the first few best runs of the experiment

In [58]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model


Retrieve the best run

In [26]:
best_run = package_utils.get_best_run(experiment, hyperdrive_run)

In [27]:
print(best_run)
print(best_run.get_metrics())

Run(Experiment: jupyter-2021-01-29-09-01,
Id: HD_67ab13fd-0409-4b2d-807f-a943d6e47e67_9,
Type: azureml.scriptrun,
Status: Completed)
{'n_estimators': 15, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'bootstrap': True, 'oob_score': False, 'n_jobs': -1, 'random_state': 0, 'ccp_alpha': 0.0, 'norm_macro_recall': 0.858415258389476, 'model_filename': "PosixPath('outputs/model.pkl')"}


Save the best model locally

In [28]:
%%time
best_run.download_file("outputs/model.pkl", "models/hyperdrive/model.pkl")

CPU times: user 1.69 s, sys: 686 ms, total: 2.37 s
Wall time: 21.2 s


In [29]:
print("model size in Bytes", pathlib.Path("models/hyperdrive/model.pkl").stat().st_size)

model size in Bytes 12551631


Download other files from the best_run

In [30]:
print(best_run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_87f26aa19dceff6881b22262cddb911372b1b208b643e45300caadfed6f08378_p.txt', 'azureml-logs/65_job_prep-tvmps_87f26aa19dceff6881b22262cddb911372b1b208b643e45300caadfed6f08378_p.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_87f26aa19dceff6881b22262cddb911372b1b208b643e45300caadfed6f08378_p.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/79_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_0bdc0e2b-1e3a-4a7d-95c4-45357f0e9a6d.jsonl', 'logs/azureml/dataprep/engine_spans_7c37b2d0-3947-4335-b67f-2cbf7e5fc3b4.jsonl', 'logs/azureml/dataprep/engine_spans_fa8ec1d7-e0ad-4350-b5ac-0e6fe08fd391.jsonl', 'logs/azureml/dataprep/python_span_0bdc0e2b-1e3a-4a7d-95c4-45357f0e9a6d.jsonl', 'logs/azureml/dataprep/python_span_7c37b2d0-3947-4335-b67f-2cbf7e5fc3b4.jsonl', 'logs/azureml/dataprep/python_spa

In [31]:
best_run.download_file("azureml-logs/70_driver_log.txt",
                       "models/hyperdrive/70_driver_log.txt")

Explore the model

In [32]:
%%time
fitted_model = joblib.load("models/hyperdrive/model.pkl")

CPU times: user 112 ms, sys: 73.7 ms, total: 186 ms
Wall time: 238 ms


In [33]:
print(fitted_model)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=15, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=15, n_jobs=-1,
                       oob_score=False, random_state=0, verbose=0,
                       warm_start=False)


## Model testing

Test the model on the test set from 2018 and on an additional out-of-sample test set from 2017.

Test the model on the 2018 dataset

In [34]:
%%time
test = Dataset.get_by_name(
            workspace=workspace,
            name=dataset_test_name,
        ).to_pandas_dataframe()
x_test, y_test = test.drop(labels=["Label"], axis=1), test["Label"]

CPU times: user 21.6 s, sys: 16.1 s, total: 37.8 s
Wall time: 2min 22s


In [35]:
%%time
y_test_predict = fitted_model.predict(x_test)

CPU times: user 10.2 s, sys: 1min 2s, total: 1min 12s
Wall time: 29.3 s


In [36]:
%%time
cr = classification_report(digits=4,
                           y_true=y_test,
                           y_pred=y_test_predict,
                           output_dict=False)
print(cr)

                          precision    recall  f1-score   support

                  Benign     0.9864    0.9998    0.9930   2048549
                     Bot     1.0000    0.9999    0.9999     43419
        Brute Force -Web     1.0000    0.9524    0.9756        84
        Brute Force -XSS     1.0000    1.0000    1.0000        41
        DDOS attack-HOIC     1.0000    1.0000    1.0000     48951
  DDoS attacks-LOIC-HTTP     1.0000    1.0000    1.0000     86440
   DoS attacks-GoldenEye     1.0000    0.9995    0.9998      8184
        DoS attacks-Hulk     0.9999    0.9999    0.9999      7658
DoS attacks-SlowHTTPTest     0.7728    0.5163    0.6191     41976
   DoS attacks-Slowloris     0.9992    1.0000    0.9996      2598
          FTP-BruteForce     0.7177    0.8902    0.7947     58010
           Infilteration     0.6913    0.0355    0.0675     29297
           SQL Injection     0.8889    0.6154    0.7273        13
          SSH-Bruteforce     1.0000    0.9998    0.9999     56208

        

In [37]:
del test

Test the model on the 2017 dataset

In [38]:
%%time
test_2017 = Dataset.get_by_name(
            workspace=workspace,
            name=dataset_2017_name,
        ).to_pandas_dataframe()
x_test_2017, y_test_2017 = test_2017.drop(labels=["Label"], axis=1), test_2017["Label"]

CPU times: user 11.3 s, sys: 7.18 s, total: 18.4 s
Wall time: 1min 20s


In [39]:
%%time
y_test_2017_predict = fitted_model.predict(x_test_2017)


CPU times: user 4.14 s, sys: 16.1 s, total: 20.3 s
Wall time: 11.3 s


In [40]:
%%time
cr = classification_report(digits=4,
                           y_true=y_test_2017,
                           y_pred=y_test_2017_predict,
                           output_dict=False)
print(cr)

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - CacheDriver:Cached token is expired at 2021-01-30 20:23:59.640627.  Refreshing"}
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - CacheDriver:Cached token is expired at 2021-01-30 20:23:59.640627.  Refreshing"}
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - CacheDriver:Cached token is expired at 2021-01-30 20:23:59.640627.  Refreshing"}
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - CacheDriver:Cached token is expired at 2021-01-30 20:23:59.640627.  Refreshing"}
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - TokenRequest:Getting a new token from a refresh token"}
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - TokenRequest:Getting a new token from a refresh token"}
{"message": "aa5704da-37a8-4caa-9963-3af7d813eac6 - TokenRequest:Getting a new token from a refresh 

                          precision    recall  f1-score   support

                  Benign     0.8332    0.9968    0.9076    953268
                     Bot     0.0000    0.0000    0.0000      1956
        Brute Force -Web     0.0000    0.0000    0.0000      1507
        Brute Force -XSS     0.0000    0.0000    0.0000       635
   DoS attacks-GoldenEye     0.5829    0.0460    0.0853      7714
        DoS attacks-Hulk     0.0000    0.0000    0.0000    164013
DoS attacks-SlowHTTPTest     0.0000    0.0000    0.0000      2327
   DoS attacks-Slowloris     0.9405    0.7578    0.8394      4154
          FTP-BruteForce     0.0000    0.0000    0.0000      6443
           Infilteration     0.0000    0.0000    0.0000        33
           SQL Injection     0.0000    0.0000    0.0000        21
          SSH-Bruteforce     0.0004    0.0002    0.0002      5885

                accuracy                         0.8308   1147956
               macro avg     0.1964    0.1501    0.1527   1147956
        

In [41]:
del test_2017
del x_test_2017

## Model Deployment

Register the best model into the workspace

In [42]:
%%time
model = package_utils.register_model(model_name="hyperdrive-jupyter",
                                     model_path="outputs/model.pkl",
                                     run=best_run)

CPU times: user 267 ms, sys: 31.5 ms, total: 299 ms
Wall time: 5.07 s


Deploy the registered model to an Azure Container instance

In [43]:
%%time
service = deploy_config.main(model_name="hyperdrive-jupyter",
                             deployment_name="hyperdrive-jupyter")

{"message": "main", "environment": "Environment(Name: hyperdrive-jupyter,\nVersion: None)"}
{"message": "main", "environment": "Environment(Name: hyperdrive-jupyter,\nVersion: None)"}
{"message": "main", "environment": "Environment(Name: hyperdrive-jupyter,\nVersion: None)"}
{"message": "main", "environment": "Environment(Name: hyperdrive-jupyter,\nVersion: None)"}
{"message": "main", "inference_config": "InferenceConfig(entry_script=model/deploy/score.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=/app/nd00333, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x7f8374030250>)"}
{"message": "main", "inference_config": "InferenceConfig(entry_script=model/deploy/score.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=/app/nd00333, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x7f837403025

CPU times: user 1.96 s, sys: 201 ms, total: 2.16 s
Wall time: 15.5 s


In [44]:
%%time
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...........................................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
CPU times: user 20 s, sys: 1.46 s, total: 21.5 s
Wall time: 11min 1s


In [45]:
assert service.state == "Healthy"

Test the service endpoint

Fetch the API keys of the service endpoint

In [46]:
primary_api_key, secondary_api_key = service.get_keys()

Retrive the scoring url of the service endpoint

In [47]:
url = service.scoring_uri
print(url)

http://d94d9cec-8c79-4b8d-b3aa-0aa7475585d2.southcentralus.azurecontainer.io/score


Prepare a subset of the test dataset for submission to the service

In [48]:
input_data = json.dumps({'data': x_test[0:1].to_dict(orient='records')})
with open("data.json", "w") as _f:
    _f.write(input_data)
!cat data.json

{"data": [{"Flow Duration": 3761843, "TotLen Fwd Pkts": 1441, "TotLen Bwd Pkts": 1731, "Fwd Pkt Len Std": 191, "Bwd Pkt Len Max": 1179, "Bwd Pkt Len Std": 405, "Flow Byts/s": 843, "Flow Pkts/s": 6, "Flow IAT Max": 953181, "Bwd IAT Min": 124510, "Bwd Header Len": 172, "Pkt Len Max": 1179, "Pkt Len Std": 279, "RST Flag Cnt": 1, "PSH Flag Cnt": 1, "ECE Flag Cnt": 1, "Init Fwd Win Byts": 8192, "Init Bwd Win Byts": 62644, "Fwd Seg Size Min": 20}]}

Call the service using the input_data

In [49]:
print(service.run(input_data))

{"result": ["Benign"]}


Send a post request to the service endpoint using curl

In [50]:
%%time
!curl -X POST \
      -H 'Content-Type: application/json' \
      -H "Authorization: Bearer $secondary_api_key" \
      --data @data.json $url

"{\"result\": [\"Benign\"]}"CPU times: user 27.1 ms, sys: 20.9 ms, total: 48 ms
Wall time: 1.34 s


Send a post request to the service endpoint programatically

In [51]:
# Set the content type
headers = {"Content-Type": "application/json"}
# If authentication is enabled, set the authorization header
headers["Authorization"] = f"Bearer {secondary_api_key}"

resp = requests.post(url, input_data, headers=headers)
print(resp.json())

{"result": ["Benign"]}


In [53]:
del x_test

Print the service logs

In [54]:
print(service.get_logs())

2021-01-30T20:30:46,742798867+00:00 - iot-server/run 
2021-01-30T20:30:46,743145570+00:00 - gunicorn/run 
2021-01-30T20:30:46,744434580+00:00 - rsyslog/run 
2021-01-30T20:30:46,752841946+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-01-30T20:30:46,945078851+00:00 - iot-server/finish 1 0
2021-01-30T20:30:46,946887265+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 19.9.0
Listening at: http://127.0.0.1:31311 (14)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 43
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-01-30 20:30:47,856 | root | INFO | Starting up app insights client
2021-01-30 20:30:47,857 | root | INFO | Starting up request id generator
2021-01-30 20:30:47,857 | root | INFO | Starting up app insight hooks
2021-01-30 20:30:47,857 | root | INFO | Invoking user's init function
2021-01-30 20:30:48,207 | root | INFO | Users's init has completed suc

Delete the service endpoint

In [55]:
service.delete()

Delete the compute cluster

In [56]:
cluster_name = package_utils.trim_cluster_name(workspace.name)
print(f"cluster_name {cluster_name}")

cluster_name starts-ws-136633


In [57]:
try:
    compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
    compute_target.delete()
    print(f"compute_target {compute_target} is being deleted")    
except ComputeTargetException:
    print(f"compute_target {cluster_name} does not exist")

compute_target AmlCompute(workspace=Workspace.create(name='quick-starts-ws-136633', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-136633'), name=starts-ws-136633, id=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourceGroups/aml-quickstarts-136633/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-136633/computes/starts-ws-136633, type=AmlCompute, provisioning_state=Deleting, location=southcentralus, tags=None) is being deleted
