Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/ignore-notebook)

In [None]:
import logging
import os
import csv

import numpy as np
import pandas as pd
from sklearn import datasets

import azureml.core
from azureml.core import Run, Workspace
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
import azureml.dataprep as dprep
from azureml.core.dataset import Dataset

In [None]:
# Load workspace
ws = Workspace.from_config()

In [None]:
# Choose a name for the run history container in the workspace.
experiment_name = 'NoWindowOD'
project_folder = './project'

experiment = Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace Name'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

# Choose a name for your cluster.
amlcompute_cluster_name = "gpu-cluster"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
compute_target = None
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':
    found = True
    print('Found existing compute target.')
    compute_target = cts[amlcompute_cluster_name]

if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_NC6",
                                                                max_nodes = 4)
    # Create the cluster.
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)

# Can poll for a minimum number of nodes and for a specific timeout.
# If no min_node_count is provided, it will use the scale settings for the cluster.
compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)

# For a more detailed view of current AmlCompute status, use get_status().

## Data

In [None]:
from azureml.core.datastore import Datastore

# replace with account key for visionnotebooksdata storage account
from azureml.core.datastore import Datastore
if 'datasets' in ws.datastores.keys():
    ds = ws.datastores['datasets']
else:
    account_key = os.getenv("ACCOUNT_KEY")
    ds = Datastore.register_azure_blob_container(ws, datastore_name='datasets', container_name='imagecontainer', 
                                             account_name='dicommodel2290602728', account_key=account_key, 
                                             resource_group='dicom-model-rg')

In [None]:
from azureml.contrib.dataset.labeled_dataset import _LabeledDatasetFactory, LabeledDatasetTask
from azureml.core import Dataset

# get training dataset
training_dataset_name = experiment_name + "_training"
if training_dataset_name in ws.datasets:
    training_dataset = ws.datasets.get(training_dataset_name)
    print('Found the training dataset', training_dataset_name)
else:
    raise RuntimeError('Please use fhl_dicom_multiclass_create_labeled_datasets.ipynb to create labeled dataset for training.')

print("Training dataset name: " + training_dataset.name)

# get test dataset
test_dataset_name = experiment_name + "_test"
if test_dataset_name in ws.datasets:
    test_dataset = ws.datasets.get(test_dataset_name)
    print('Found the test dataset', test_dataset_name)
else:
    raise RuntimeError('Please use fhl_dicom_multiclass_create_labeled_datasets.ipynb to create labeled dataset for training.')

    
print("Training dataset name: " + training_dataset.name)
print("Test dataset name: " + test_dataset_name)


In [None]:
automl_settings = {
    "deterministic": True,
    "enable_dnn": True,
    "featurization": "off",
    "iteration_timeout_minutes": 1000,
    "iterations": 1,
    "log_verbose_metrics": True,
    "primary_metric": "mean_average_precision",
    "print_local_package_versions": True,
    "seed" : 47
}

if os.getenv("SCENARIO"):
    automl_settings["scenario"] = os.getenv("SCENARIO")

automl_config = AutoMLConfig(task = 'image-object-detection',
                             debug_log = 'automl_errors_1.log',
                             path = project_folder,
                             compute_target=compute_target,
                             training_data=training_dataset,
                             # validation_data=validation_dataset,
                             **automl_settings
                            )

In [None]:
remote_run = experiment.submit(automl_config, show_output = False)

In [None]:
remote_run

In [None]:
remote_run.wait_for_completion(wait_post_processing=True)

# Inference runs

In [None]:
import tempfile
from azureml.core.script_run_config import ScriptRunConfig

In [None]:
# Load training script run corresponding to AutoML run above.
training_run_id = remote_run.id + "_HD_0"
training_run = Run(experiment, training_run_id)

In [None]:
# Inference script run arguments
arguments = [
        "--run_id", training_run_id,
        "--experiment_name", experiment.name,
        "--input_dataset_id", test_dataset.id,
        "--validate_score", True
    
    ]

### Scoring run

In [None]:
output_prediction_file = "./outputs/predictions.txt"
scoring_args = arguments + ["--output_file", output_prediction_file]
with tempfile.TemporaryDirectory() as tmpdir:
    # Download required files from training run into temp folder.
    entry_script_name = "score_script.py"
    output_path = os.path.join(tmpdir, entry_script_name)
    training_run.download_file("train_artifacts/" + entry_script_name, os.path.join(tmpdir, entry_script_name))
    
    script_run_config = ScriptRunConfig(source_directory=tmpdir,
                                        script=entry_script_name,
                                        compute_target=compute_target,
                                        environment=training_run.get_environment(),
                                        arguments=scoring_args)
    scoring_run = experiment.submit(script_run_config)  

In [None]:
scoring_run

In [None]:
scoring_run.wait_for_completion(wait_post_processing=True)

# Get Prediction Results

In [None]:
scoring_run.download_file(output_prediction_file, output_file_path=output_prediction_file)