## Setup

As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments.

In [None]:
import json
import logging

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn import datasets
import azureml.dataprep as dprep
from sklearn.model_selection import train_test_split

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun

In [None]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = '{{ experiment_name }}'
# project folder for temporary/log files
project_folder = os.path.join(os.getcwd(), experiment_name)

experiment=Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

## Create or Attach existing AmlCompute
You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.
#### Creation of AmlCompute takes approximately 5 minutes. 
If the AmlCompute with that name is already in your workspace this code will skip the creation process.
As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota.

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

# Choose a name for your cluster.
amlcompute_cluster_name = "{{ compute_target }}"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':
    found = True
    print('Found existing compute target.')
    compute_target = cts[amlcompute_cluster_name]
    
if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", # for GPU, use "STANDARD_NC6"
                                                                #vm_priority = 'lowpriority', # optional
                                                                max_nodes = 6)

    # Create the cluster.
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)
    
    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min_node_count is provided, it will use the scale settings for the cluster.
    compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)
    
     # For a more detailed view of current AmlCompute status, use get_status().

# Fetch entity csv data

Here, load the data in the get_data() script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_Run_config.

In [None]:
os.makedirs('data', exist_ok=True)
os.makedirs(project_folder, exist_ok=True)

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

# create a new RunConfig object
conda_run_config = RunConfiguration(framework="python")

# Set compute target to AmlCompute
conda_run_config.target = compute_target
conda_run_config.environment.docker.enabled = True
conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE


cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])
conda_run_config.environment.python.conda_dependencies = cd

### Load Data

Here we create the script to be run in Azure compute for loading the data.

In [None]:
# The following is example code. Replace with your own implementation.

# data = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
# dflow = dprep.auto_read_file(data)
# dflow.get_profile()
# dflow.head()

# training_data = dflow
# label_column_name = 'y'

# Delete the below line after you do
raise NotImplementedError('Please provide training_data and label_column_name before continuing.')

## Train

Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.

In [None]:
import json

# Load the previously used settings. Make any desired changes here.
automl_settings = {{ modified_automl_settings }}

# Use the compute target and training data defined in previous cells.
automl_settings.update({
    "compute_target": compute_target,
    "training_data": training_data,
    "label_column_name": label_column_name
})

automl_settings

In [None]:
automl_config = AutoMLConfig(**automl_settings)
remote_run = experiment.submit(automl_config, show_output = False)

In [None]:
remote_run

In [None]:
remote_run.wait_for_completion()

In [None]:
best_run, fitted_model = remote_run.get_output()

In [None]:
best_run_customized, fitted_model_customized = remote_run.get_output()

## Results

#### Widget for Monitoring Runs

The widget will first report a "loading" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.

**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details

In [None]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show() 

## Deploy

### Retrieve the Best Model

Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*.

In [None]:
best_run, fitted_model = remote_run.get_output()

In [None]:
model_name = best_run.properties['model_name']

script_file_name = 'inference/score.py'
conda_env_file_name = 'inference/env.yml'

best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')
best_run.download_file('outputs/conda_env_v_1_0_0.yml', 'inference/env.yml')

### Register the Fitted Model for Deployment
If neither metric nor iteration are specified in the register_model call, the iteration with the best primary metric is registered.

In [None]:
description = 'AutoML Model generated from experiment {{ experiment_name }}'
tags = None
model = remote_run.register_model(model_name = model_name, description = description, tags = tags)

print(remote_run.model_id) # This will be written to the script file later in the notebook.

### Deploy the model as a Web Service on Azure Container Instance

In [None]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model

inference_config = InferenceConfig(runtime = "python", 
                                   entry_script = script_file_name,
                                   conda_file = conda_env_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = {'area': "bmData", 'type': "automl-{{task_type}}"},
                                               description = 'sample service for AutoML {{task_type}}')

aci_service_name = '{{ experiment_name }}'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

### Call the service


In [None]:
## sample code needs to be updated
import json
# The request data frame needs to have y_query column which corresponds to query.
X_query = X_test.copy()
X_query['y_query'] = np.NaN
# We have to convert datetime to string, because Timestamps cannot be serialized to JSON.
X_query[time_column_name] = X_query[time_column_name].astype(str)
# The Service object accept the complex dictionary, which is internally converted to JSON string.
# The section 'data' contains the data frame in the form of dictionary.
test_sample = json.dumps({'data': X_query.to_dict(orient='records')})
response = aci_service.run(input_data = test_sample)
# translate from networkese to datascientese
try: 
    res_dict = json.loads(response)
    y_fcst_all = pd.DataFrame(res_dict['index'])
    y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')
    y_fcst_all['forecast'] = res_dict['forecast']    
except:
    print(res_dict)

### Delete a Web Service
Deletes the specified web service.

In [None]:
#aci_service.delete()

### Get Logs from a Deployed Web Service
Gets logs from a deployed web service.

In [None]:
#aci_service.get_logs()