# Automated ML

Import of all the dependencies that we need to complete the project.

In [None]:
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails

import azureml.core
from azureml.data.dataset_factory import TabularDatasetFactory
from sklearn.model_selection import train_test_split
from azureml.train.automl import AutoMLConfig

import os
import joblib
import pandas as pd
import numpy as np

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

## Dataset

### Overview

Dataset from Davide Chicco, Giuseppe Jurman: Machine learning can predict survival of patients with heart failure from serum creatinine and ejection fraction alone. BMC Medical Informatics and Decision Making 20, 16 (2020)

Heart failure is a common event caused by Cardiovascular diseases (CVDs) and this dataset contains 12 features that can be used to predict mortality by heart failure.

01- age : Age of the patient (years)
02- anaemia : Decrease of red blood cells or hemoglobin (boolean)
03- creatinine_phosphokinase : Level of the CPK enzyme in the blood (mcg/L)
04- diabetes : If the patient has diabetes (boolean)
05- ejection_fraction : Percentage of blood leaving the heart at each contraction (percentage)
06- high_blood_pressure : If the patient has hypertension (boolean)
07- platelets : Platelets in the blood (kiloplatelets/mL)
08- serum_creatinine : Level of serum creatinine in the blood (mg/dL)
09- serum_sodium :Level of serum sodium in the blood (mEq/L)
10- sex : Woman or man (binary)
11- smoking : If the patient smokes or not (boolean)
12- time : Follow-up period (days)

### Task

An Azure Auto ML will be performed to predict if the patient deceased during the follow-up period (DEATH_EVENT : boolean), based on the 12 clinical features. 

### Connect to a workspace

In [None]:
ws = Workspace.from_config()

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

### Create an Azure ML experiment

In [None]:
# choose a name for experiment
experiment_name = 'automl_heart_failure_experiment'
project_folder = './automl-model'
experiment=Experiment(ws, experiment_name)
experiment
run = experiment.start_logging()

### Create and explore dataset

In [1]:
# Data set was downloaded as a csv file and registered as data set in the workspace
dataset=Dataset.get_by_name(ws,name="heart_failure_clinical_records_dataset")
df = dataset.to_pandas_dataframe()
df.describe()
dataset.take(5).to_pandas_dataframe()

In [None]:
currDir=os.getcwd()
print(currDir)
os.listdir(currDir)

### Create or Attach a Compute Resource

In [None]:
# Create compute cluster
# Use vm_size = "STANDARD_D12_V2" in provisioning configuration.
# max_nodes 6.

# Choose a name for CPU cluster
cluster_name = "my-cpu-cluster"

# Check if the compute target exists
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target, use it')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2', 
                                                           max_nodes=6)
    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

# get a detailed status for the current cluster
print(compute_target.get_status().serialize())

## AutoML Configuration

We didn't explicitly specified either a validation_data or n_cross_validation parameter, automated ML applies default techniques depending on the number of rows provided in the single training_data=dataset. Dataset is less than 1,000 rows, 10 folds are used.

In [None]:
# AutoML settings
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy',
}

# AutoML config
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="DEATH_EVENT",   
                             path = project_folder,
                             enable_early_stopping= True,
                             debug_log = "automl_errors.log",
                             **automl_settings
)

In [None]:
# Submit the experiment
remote_run = experiment.submit(automl_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

Using the `RunDetails` widget to show the different experiments.

In [None]:
remote_run

In [None]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

## Best Model

Getting the best model from the automl experiments and displaying all the properties of the model.

In [None]:
# Retrieve and save the best automl model.
automl_best_run, fitted_automl_best_model = remote_run.get_output()
best_run_metrics = automl_best_run.get_metrics()


In [None]:
print('=========================== Best Run ID ===========================')

In [None]:
automl_best_run.id

In [None]:
print('=========================== Best Run ===========================')

In [None]:
automl_best_run

In [None]:
print('=========================== Best Model ===========================')

In [None]:
fitted_automl_best_model

In [None]:
print('=========================== Best Run File Names ===========================')

In [None]:
automl_best_run.get_file_names()

In [None]:
print('=========================== Best Run Metrics ===========================')

In [None]:
best_run_metrics

In [None]:
# Metric in best run

for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)
    
print('\nAccuracy of Best Run',best_run_metrics['accuracy'],sep='\n')
print(automl_best_run)

In [None]:
print('=========================== Best Run Properties ===========================')

In [None]:
model_name = automl_best_run.properties['model_name']
model_name

In [None]:
#TODO: Save the best model

os.makedirs('./outputs', exist_ok=True)
joblib.dump(fitted_automl_best_model, filename='outputs/automl_best_model.joblib')

In [None]:
# Fetch Environment

env = automl_best_run.get_environment()

script_file_name = 'score.py'
automl_best_model.download_file('outputs/scoring_file_v_1_0_0.py', script_file_name)

## Model Deployment

We have to deploy only one of the two models we trained.

In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
#Register the model

description = 'AutoML Model trained on heart failure dataset to predict if the patient deceased during the follow-up period'
tags = None
model = remote_run.register_model(model_name=model_name, description=description, tags=tags)
print(model.name, model.id, model.version, sep='\t')

In [None]:
#create an inference config and deploy the model as a web service on Azure Container Instance

from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file_name, environment=env)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 2, 
                                                       memory_gb = 2
                                                       tags = {'area': "heart_failure", 'type': "automl_classification"}, 
                                                       description = 'sample service for Automl Classification',
                                                       auth_enabled = True,
                                                       primary_key = 'iOhff5Z0kJlzznr9Wq4c3KcGQULltKYB')

deploy_service_name= 'automl-heart-failure-model-deployment'
service = Model.deploy(ws,deploy_service_name,  [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)

scoring_uri = service.scoring_uri

print(deploy_service_name)
print("\nState: ",service.state)
print("\nScoring URI: ", scoring_uri)

In [None]:
service.update(enable_app_insights=True)

Sending a request to the web service we deployed to test it.

In [None]:
import requests
import json


scoring_uri = 'http://973f9601-a8f9-4bd4-9ade-76ee4b5d8d78.southcentralus.azurecontainer.io/score'
headers = {'Content-Type':'application/json'}

test_data_1 = json.dumps({'data':[{
    'age':75,
    'anaemia':0,
    'creatinine_phosphokinase':582,
    'diabetes':0,
    'ejection_fraction':20,
    'high_blood_pressure':1,
    'platelets':265000,
    'serum_creatinine':1.9,
    'serum_sodium':130,
    'sex':1,
    'smoking':0,
    'time':4}
    ]
        })

test_data_2 = json.dumps({'data':[{
    'age':40,
    'anaemia':0,
    'creatinine_phosphokinase':321,
    'diabetes':0,
    'ejection_fraction':35,
    'high_blood_pressure':0,
    'platelets':265000,
    'serum_creatinine':1,
    'serum_sodium':130,
    'sex':1,
    'smoking':0,
    'time':198}
    ]
        })


response1 = requests.post(scoring_uri, data=test_data_1, headers=headers)

print("Result 1:",response1.text)

response2 = requests.post(scoring_uri, data=test_data2_, headers=headers)

print("Result 2:",response2.text)

Print the logs of the web service and delete the service

In [None]:
service.get_logs()

In [None]:
service.delete()