## Importing the required Libraries

In [1]:
import logging

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import warnings
import os

# Squash warning messages for cleaner output in the notebook
warnings.showwarning = lambda *args, **kwargs: None

import azureml.core
from azureml.core import Experiment, Workspace, Dataset
from azureml.train.automl import AutoMLConfig
from datetime import datetime
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig

from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

In [2]:

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-133645
aml-quickstarts-133645
southcentralus
cdbe0b43-92a0-4715-838a-f2648cc7ad21


## Configuring the Compute Cluster

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

ws = Workspace.from_config() # this automatically looks for a directory .azureml

# Choose a name for your CPU cluster
cpu_cluster_name = "TargetCluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                            max_nodes=4, 
                                                            idle_seconds_before_scaledown=2400,
                                                            vm_priority='lowpriority')
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
notebook133645 ComputeInstance Succeeded
TargetCluster AmlCompute Succeeded


## Setting the experiment in the Work space

In [4]:

experiment_name = 'automl-Diabetes'

experiment=Experiment(ws, experiment_name)

In [5]:
experiment

Name,Workspace,Report Page,Docs Page
automl-Diabetes,quick-starts-ws-133508,Link to Azure Machine Learning studio,Link to Documentation


## Ladong the Dataset fetched from Kaggle into Github

In [5]:
path='https://raw.githubusercontent.com/maheshcheetirala/Azure-Machine-Learning-ND-capstone/Main/diabetes.csv'
dataset = Dataset.Tabular.from_delimited_files(path)
dataset = dataset.register(workspace=ws,name='DiabetesDataset')
df = dataset.to_pandas_dataframe()

In [6]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


## AutomL configuration

In [7]:
from azureml.train.automl import AutoMLConfig


automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    debug_log = 'automl_errors.log',
    task='classification',
    primary_metric='accuracy',
    training_data=dataset,
    label_column_name='Outcome',
    n_cross_validations=5,
    compute_target="TargetCluster",
    iterations=24,
    max_concurrent_iterations=8)

## Submiiting the Experiment Run

In [8]:

remote_run = experiment.submit(automl_config,show_output=True)

Running on remote.
No run_configuration provided, running on TargetCluster with default configuration
Running on remote compute: TargetCluster
Parent Run ID: AutoML_e8661898-9d01-4bb5-8e1e-dfd5953a4cbc

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputati

In [None]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

In [12]:
remote_run.wait_for_completion(show_output=True)



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more abo

{'runId': 'AutoML_e8661898-9d01-4bb5-8e1e-dfd5953a4cbc',
 'target': 'TargetCluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-06T12:54:44.279259Z',
 'endTimeUtc': '2021-01-06T13:07:08.807814Z',
 'properties': {'num_iterations': '24',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'TargetCluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"a534c071-eca6-4220-992d-d71a6150bfe1\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/maheshcheetirala/Azure-Machine-Learning-ND-capstone/Main/diabetes.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\\\\\": tru

In [13]:
bestrun,model=remote_run.get_output()

In [14]:
bestrun

Experiment,Id,Type,Status,Details Page,Docs Page
automl-Diabetes,AutoML_e8661898-9d01-4bb5-8e1e-dfd5953a4cbc_22,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [15]:
model

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                             KNeighborsClassifier(algorithm='auto',
                                                                                                  leaf_size=30,
                                                                                                  metric='l1',
                                                     

In [13]:
best_run_metrics = bestrun.get_metrics()

In [14]:
model

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                               reg_alpha=0,
                                                                                               reg_lambda=1.5625,
                                                                                               scale_pos_weight=1,
                                                       

In [16]:
import joblib
os.makedirs('./outputs', exist_ok=True)

joblib.dump(model, filename='outputs/automl_model.joblib')

model_name = bestrun.properties['model_name']
model_name

'AutoMLe8661898922'

In [22]:
env = bestrun.get_environment().save_to_directory(path='environments')

script_file = 'score.py'

bestrun.download_file('outputs/scoring_file_v_1_0_0.py', script_file)

## Registering the Model

In [19]:
model = remote_run.register_model(model_name = model_name,
                                  description = 'AutoML model')
model.id

'AutoMLe8661898922:1'

##  Setting Inferecne Config and ACi CONFIG

In [23]:
inference_config = InferenceConfig(entry_script = script_file, environment = env)

aci_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

aci_service_name = 'automl-diabetes'
print(aci_service_name)

automl-diabetes


In [24]:
service = Model.deploy(ws, aci_service_name, [model], inference_config, aci_config)
service.wait_for_deployment(True)
print("State: " + service.state)
print("Scoring URI: " + service.scoring_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...............................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
State: Healthy
Scoring URI: http://965504f0-4479-4756-aafd-7dbc2c2d9c72.southcentralus.azurecontainer.io/score


## testing the Endpoint

In [47]:
%run endpoint.py

{"result": [1, 1]}


In [38]:
df_test=df[700:710]

In [39]:
df_test

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
700,2,122,76,27,200,35.9,0.48,26,0
701,6,125,78,31,0,27.6,0.56,49,1
702,1,168,88,29,0,35.0,0.91,52,1
703,2,129,0,0,0,38.5,0.3,41,0
704,4,110,76,20,100,28.4,0.12,27,0
705,6,80,80,36,0,39.8,0.18,28,0
706,10,115,0,0,0,0.0,0.26,30,1
707,2,127,46,21,335,34.4,0.18,22,0
708,9,164,78,0,0,32.8,0.15,45,1
709,2,93,64,32,160,38.0,0.67,23,1


In [40]:
y_true=df_test.pop('Outcome')

In [41]:
sample_json = json.dumps({'data':df_test.to_dict(orient='records')})

In [42]:
sample_json

'{"data": [{"Pregnancies": 2, "Glucose": 122, "BloodPressure": 76, "SkinThickness": 27, "Insulin": 200, "BMI": 35.9, "DiabetesPedigreeFunction": 0.483, "Age": 26}, {"Pregnancies": 6, "Glucose": 125, "BloodPressure": 78, "SkinThickness": 31, "Insulin": 0, "BMI": 27.6, "DiabetesPedigreeFunction": 0.565, "Age": 49}, {"Pregnancies": 1, "Glucose": 168, "BloodPressure": 88, "SkinThickness": 29, "Insulin": 0, "BMI": 35.0, "DiabetesPedigreeFunction": 0.905, "Age": 52}, {"Pregnancies": 2, "Glucose": 129, "BloodPressure": 0, "SkinThickness": 0, "Insulin": 0, "BMI": 38.5, "DiabetesPedigreeFunction": 0.304, "Age": 41}, {"Pregnancies": 4, "Glucose": 110, "BloodPressure": 76, "SkinThickness": 20, "Insulin": 100, "BMI": 28.4, "DiabetesPedigreeFunction": 0.118, "Age": 27}, {"Pregnancies": 6, "Glucose": 80, "BloodPressure": 80, "SkinThickness": 36, "Insulin": 0, "BMI": 39.8, "DiabetesPedigreeFunction": 0.177, "Age": 28}, {"Pregnancies": 10, "Glucose": 115, "BloodPressure": 0, "SkinThickness": 0, "Insul

In [43]:

output = service.run(sample_json)

In [44]:
output

'{"result": [0, 1, 1, 0, 0, 0, 0, 0, 1, 0]}'

In [45]:
print("Predicted Values are :", output)
print("True Values are:",y_true.values)

Predicted Values are : {"result": [0, 1, 1, 0, 0, 0, 0, 0, 1, 0]}
True Values are: [0 1 1 0 0 0 1 0 1 1]


## Application Insights

In [46]:
service.get_logs()

'2021-01-06T13:24:16,298985709+00:00 - rsyslog/run \n2021-01-06T13:24:16,299124910+00:00 - iot-server/run \n2021-01-06T13:24:16,299125610+00:00 - gunicorn/run \n2021-01-06T13:24:16,343325987+00:00 - nginx/run \n/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)

In [34]:
service.delete()