## Automated ML

Import dependencies. 

In [None]:
import os
import pandas as pd
import numpy as np
import json
import requests
from sklearn.metrics import confusion_matrix
import itertools

from azureml.core import Dataset, Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig

from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

## Dataset

### Overview

**Dataset:** Davide Chicco, Giuseppe Jurman: "Machine learning can predict survival of patients with heart failure from serum creatinine and ejection fraction alone". BMC Medical Informatics and Decision Making 20, 16 (2020)

Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure.

**12 clinical features:**

- age: age of the patient (years)
- anaemia: decrease of red blood cells or hemoglobin (boolean)
- high blood pressure: if the patient has hypertension (boolean)
- creatinine phosphokinase (CPK): level of the CPK enzyme in the blood (mcg/L)
- diabetes: if the patient has diabetes (boolean)
- ejection fraction: percentage of blood leaving the heart at each contraction (percentage)
- platelets: platelets in the blood (kiloplatelets/mL)
- sex: woman or man (binary)
- serum creatinine: level of serum creatinine in the blood (mg/dL)
- serum sodium: level of serum sodium in the blood (mEq/L)
- smoking: if the patient smokes or not (boolean)
- time: follow-up period (days)

In this project, Azure AutoML will be used to make prediction on the death event based on patient's 12 clinical features. 

## Workspace setup

In [None]:
ws = Workspace.from_config()

experiment_name = 'automl-experiment'

experiment = Experiment(ws, experiment_name)

In [None]:
dataset = Dataset.get_by_name(ws, 'heart-disease-kaggle')

df = dataset.to_pandas_dataframe()
df.describe()

In [None]:
data_train, data_test = dataset.random_split(0.8)

## Config Compute Cluster

In [None]:
cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D12_V2',
                                                           max_nodes=5)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

## AutoML Configuration

In [None]:
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'accuracy',
    "n_cross_validations": 5
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=data_train,
                             label_column_name="DEATH_EVENT", 
                             enable_early_stopping= True,
                             featurization= 'auto',
                             **automl_settings
                            )

AutoML typically performs cross validation, data balance check, cardinality check in prior to machine learning process with a variety of algorithms. 

In [None]:
remote_run = experiment.submit(automl_config, show_output = True)

## Run Details

In [None]:
remote_run

In [None]:
RunDetails(remote_run).show()

In [None]:
remote_run.wait_for_completion()

## Best Model 

In [None]:
best_run, fitted_model = remote_run.get_output()

In [None]:
best_run

In [None]:
fitted_model

In [None]:
model_name = best_run.properties['model_name']

script_file = 'inference/score.py'

best_run.download_file('outputs/score_file.py', 'inference/score.py')

## Model Deployment

In [None]:
#Register the fitted model
model = remote_run.register_model(model_name = model_name,
                                  description = 'AutoML model')

In [None]:
inference_config = InferenceConfig(entry_script=script_file)

aci_config = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1,
                                               auth_enabled = True,
                                               primary_key = '  ')

aci_service_name = 'automl-heart-disease'
print(aci_service_name)

In [None]:
service = Model.deploy(ws, aci_service_name, [model], inference_config, aci_config)
service.wait_for_deployment(True)
print("State: " + service.state)
print("Scoring URI: " + service.scoring_uri)

In [None]:
url = service.scoring_uri
testing_data = [[
        62,
        1,
        3,
        145,
        233,
        1,
        0,
        150,
        0,
        2.1,
        0,
        0,
        1
]]

headers = {'Content-type': 'application/json'}

x = requests.post(url, data = json.dumps(testing_data), headers=headers)

print(x.json())

In [None]:
X_test = data_test.iloc[ : , :-1].values
y_test = data_test.iloc[ : , 12].values

X_test = X_test.to_pandas_dataframe()
y_test = y_test.to_pandas_dataframe()

X_test_json = X_test.to_json(orient='records')
data = "{\"data\": " + X_test_json +"}"
headers = {'Content-Type': 'application/json'}

req = requests.post(url, data, headers=headers)

y_pred = json.loads(json.loads(req.text))['result']

In [None]:
actual = array(y_test)
actual = actual[:,0]
print(len(y_pred), " ", len(actual))

In [None]:
cf = confusion_matrix(actual,y_pred)
cf

In [None]:
service.get_logs()

In [None]:
service.delete()