# Automated ML

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
# from azureml.core.dataset import Dataset
from azureml.core import Dataset, Datastore
from azureml.pipeline.steps import AutoMLStep

## Dataset

### Overview
Machine Learning with R by Brett Lantz is a book that provides an introduction to machine learning using R. As far as I can tell, Packt Publishing does not make its datasets available online unless you buy the book and create a user account which can be a problem if you are checking the book out from the library or borrowing the book from a friend. All of these datasets are in the public domain but simply needed some cleaning up and recoding to match the format in the book.

#### Data content:

Columns

age: age of primary beneficiary

sex: insurance contractor gender, female, male

bmi: Body mass index, providing an understanding of body, weights that are relatively high or low relative to height,
objective index of body weight (kg / m ^ 2) using the ratio of height to weight, ideally 18.5 to 24.9

children: Number of children covered by health insurance / Number of dependents

smoker: Smoking

region: the beneficiary's residential area in the US, northeast, southeast, southwest, northwest.

charges: Individual medical costs billed by health insurance.

#### Activity of Project:
In this capstone project we will be predicting the medical personal charges given age, sex, bmi, children, smoker, region as input parameters.

In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'demo-experiment'
experiment=Experiment(ws, experiment_name)

In [5]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
# NOTE: update the key to match the dataset name
found = False
key = "insurance Dataset"
description_text = "insurance dataset for udacity capstone project"
if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 
if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/kalimi03/nd00333-capstone/master/insurance.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


## Creating or cttaching a compute cluster target

In [7]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = "demo-cluster"
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)


Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## AutoML Configuration

The details of Azure ML configuration settings have been explained in the ReadMe document.

In [28]:
project_folder = './insurance-project'
automl_settings = {
    "n_cross_validations": 5,
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "enable_early_stopping": True,
    "primary_metric": 'normalized_root_mean_squared_error'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                            task = 'regression',
                            training_data=dataset,
                            label_column_name ="charges",
                            path = project_folder,
                            **automl_settings
                            )

In [29]:
remote_run = experiment.submit(automl_config, show_output=True)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
demo-experiment,AutoML_04dcdcf8-d9fd-42d2-a9f1-e4752e32d2b4,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

In [31]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [None]:
# return details of the run
remote_run.wait_for_completion(show_output=True)

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [35]:
best_run, model = remote_run.get_output()
best_run

Package:azureml-automl-runtime, training version:1.35.1, current version:1.34.0
Package:azureml-core, training version:1.35.0.post1, current version:1.34.0
Package:azureml-dataprep, training version:2.23.2, current version:2.22.2
Package:azureml-dataprep-rslex, training version:1.21.2, current version:1.20.1
Package:azureml-dataset-runtime, training version:1.35.0, current version:1.34.0
Package:azureml-defaults, training version:1.35.0, current version:1.34.0
Package:azureml-interpret, training version:1.35.0, current version:1.34.0
Package:azureml-mlflow, training version:1.35.0, current version:1.34.0
Package:azureml-pipeline-core, training version:1.35.0, current version:1.34.0
Package:azureml-responsibleai, training version:1.35.0, current version:1.34.0
Package:azureml-telemetry, training version:1.35.0, current version:1.34.0
Package:azureml-train-automl-client, training version:1.35.0, current version:1.34.0
Package:azureml-train-automl-runtime, training version:1.35.1, current

Run(Experiment: demo-experiment,
Id: AutoML_04dcdcf8-d9fd-42d2-a9f1-e4752e32d2b4_38,
Type: azureml.scriptrun,
Status: Completed)
RegressionPipeline(pipeline=Pipeline(memory=None,
                                     steps=[('datatransformer',
                                             DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='regression', working_dir='/mnt/batch/ta...
), random_state=0, reg_alpha=1.4583333333333335, reg_lambda=2.3958333333333335, subsample=0.6, tree_method='hist'))], verbose=False)), ('5', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('elasticnet', ElasticNet(alpha=0.001, copy_X=True, fit_intercept=True, l1_ratio=1, max_iter=1000, normalize=False, positive=False, precompute=False, random_state=None, selection='cyclic', tol=0.0001, warm_star

In [None]:
model

In [None]:
model.steps

In [None]:
dir_name = 'infer_dir'
if dir_name not in os.listdir():
    os.mkdir(dir_name)

In [39]:
#TODO: Save the best model
import pickle
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'infer_dir/score.py')
best_run.download_file('outputs/model.pkl', 'infer_dir/model.pkl')
best_run.download_file('outputs/conda_env_v_1_0_0.yml', 'infer_dir/conda_env.yml')



# save the model to disk
filename = 'best_model.sav'
pickle.dump(model, open(filename, 'wb'))

# https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-auto-train-models

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
# Register the model
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model_name = best_run.properties['model_name']
run_id = best_run.id
experiment_name = best_run.experiment.name

model = Model.register(workspace=ws,
                       model_name=model_name, # Name of the registered model in your workspace.
                       model_path='infer_dir/model.pkl',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       description='Auto ML model to predict personal medical charges.',
                       tags={'area': 'charges', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)

In [None]:
# create inference configuration
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

myenv = Environment.from_conda_specification(name="myenv", file_path="infer_dir/conda_env.yml")
inference_config = InferenceConfig(entry_script="infer_dir/score.py", environment=myenv)

# display the environment file
with open('infer_dir/conda_env.yml', 'r') as file:
    env_file = file.read()
    print(env_file)

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
# Model Deployment
from azureml.core.webservice import AciWebservice

# define deployment configuration
aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                                           memory_gb=1,
                                                           tags={'area': "charges", 'type': "regression"},
                                                           description="Predict personal medical charges using regreesion model",
                                                           enable_app_insights=True)

# deploy model as webservice using Azure Container Instance(ACI)
aci_service_name = "aci-medical-charges-deploy"

aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aci_deployment_config, overwrite=True)
aci_service.wait_for_deployment(show_output=True)

print(aci_service.state)

In [None]:
# get the active api endpoint for scoring
print(aci_service.scoring_uri)

In [None]:
# run script to score the 4 observations below in the json payloader
import json
import requests

scoring_uri = 'http://11fafa41-7731-4f6d-bf89-b58119229b7b.southcentralus.azurecontainer.io/score'
headers = {'Content-Type':'application/json'}

test_sample = json.dumps({
    "data": [
            [75.0, 0.0, 582.0, 0.0, 20.0, 1.0, 265000.0, 1.9, 130.0, 1.0, 0.0, 4.0],
            [80.0, 1.0, 123.0, 0.0, 35.0, 1.0, 388000.0, 9.4, 133.0, 1.0, 1.0, 10.0],
            [62.0, 0.0, 61.0, 1.0, 38.0, 1.0, 155000.0, 1.1, 143.0, 1.0, 1.0, 270.0],
            [50.0, 1.0, 111.0, 0.0, 20.0, 0.0, 210000.0, 1.9, 137.0, 1.0, 0.0, 7.0]
        ]
    })

response = requests.post(scoring_uri, data=test_sample, headers=headers)
print("Results:", response.json())

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
# print the log of the webservice
print(aci_service.get_logs())

In [None]:
# delete the web service
aci_service.delete()
model.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
