# Automated ML


In [43]:
from azureml.core import Workspace, Experiment, Run
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig, Environment
from azureml.widgets import RunDetails
from azureml.core.dataset import Dataset

In [44]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Dataset

### Overview
Breast Cancer Wisconsin (Diagnostic) Data Set.We approach the project as a classification task. We wanted to classiy the cancer as benign or malignant based on the provided data.
The target column is Diagnosis (M = malignant, B = benign).

In [45]:
workspace = "MLworkspace"

#ws = Workspace.get(name=workspace)
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-AutoML-BreastCancer")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

#run = exp.start_logging()

Workspace name: MLworkspace
Azure region: eastus
Subscription id: 5831e312-31c6-42ba-b4a9-5c780e5ea1da
Resource group: MLAzureGroup


### Compute Target

In [46]:
clustername = 'StandardDS12CPU'
is_new_cluster = False
try:
    gpu_cluster = ComputeTarget(workspace = ws,name= clustername)
    print("Find the existing cluster")
except ComputeTargetException:
    print("Cluster not find - Creating cluster")
    is_new_cluster = True
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    gpu_cluster = ComputeTarget.create(ws, clustername, compute_config)

gpu_cluster.wait_for_completion(show_output=True)

Find the existing cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Setup Env

In [47]:
%%writefile conda_dependencies.yml
channels:
- conda-forge
dependencies:
- python=3.6.9
- pip:
  - azureml-defaults
  - tensorflow-gpu==2.0.0
  - keras<=2.3.1
  - matplotlib
  - pandas
  - argparse
  - joblib
  - scikit-learn

Overwriting conda_dependencies.yml


In [48]:
keras_env = Environment.from_conda_specification(name='keras-env', file_path='conda_dependencies.yml')

# Specify a GPU base image
keras_env.docker.enabled = True

In [49]:
from azureml.core.runconfig import RunConfiguration
conda_run_config = RunConfiguration(framework="python")
conda_run_config.environment = keras_env

### Import data from CSV


In [23]:
import pandas as pd
df_train = pd.read_csv('./data/cancer_data.csv')
df_train.head()
df_train.describe()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


Unnamed: 0,id,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,0.0
mean,30371830.0,14.127292,19.289649,91.969033,654.889104,0.09636,0.104341,0.088799,0.048919,0.181162,...,25.677223,107.261213,880.583128,0.132369,0.254265,0.272188,0.114606,0.290076,0.083946,
std,125020600.0,3.524049,4.301036,24.298981,351.914129,0.014064,0.052813,0.07972,0.038803,0.027414,...,6.146258,33.602542,569.356993,0.022832,0.157336,0.208624,0.065732,0.061867,0.018061,
min,8670.0,6.981,9.71,43.79,143.5,0.05263,0.01938,0.0,0.0,0.106,...,12.02,50.41,185.2,0.07117,0.02729,0.0,0.0,0.1565,0.05504,
25%,869218.0,11.7,16.17,75.17,420.3,0.08637,0.06492,0.02956,0.02031,0.1619,...,21.08,84.11,515.3,0.1166,0.1472,0.1145,0.06493,0.2504,0.07146,
50%,906024.0,13.37,18.84,86.24,551.1,0.09587,0.09263,0.06154,0.0335,0.1792,...,25.41,97.66,686.5,0.1313,0.2119,0.2267,0.09993,0.2822,0.08004,
75%,8813129.0,15.78,21.8,104.1,782.7,0.1053,0.1304,0.1307,0.074,0.1957,...,29.72,125.4,1084.0,0.146,0.3391,0.3829,0.1614,0.3179,0.09208,
max,911320500.0,28.11,39.28,188.5,2501.0,0.1634,0.3454,0.4268,0.2012,0.304,...,49.54,251.2,4254.0,0.2226,1.058,1.252,0.291,0.6638,0.2075,


In [24]:
df_train.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [25]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
id                         569 non-null int64
diagnosis                  569 non-null object
radius_mean                569 non-null float64
texture_mean               569 non-null float64
perimeter_mean             569 non-null float64
area_mean                  569 non-null float64
smoothness_mean            569 non-null float64
compactness_mean           569 non-null float64
concavity_mean             569 non-null float64
concave points_mean        569 non-null float64
symmetry_mean              569 non-null float64
fractal_dimension_mean     569 non-null float64
radius_se                  569 non-null float64
texture_se                 569 non-null float64
perimeter_se               569 non-null float64
area_se                    569 non-null float64
smoothness_se              569 non-null float64
compactness_se             569 non-null float64
concavity_se               569 non

In [26]:
df_train = df_train.drop('Unnamed: 32',axis=1)
df_train = df_train.drop('id',axis=1)

In [27]:
df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: 1 if x =='B' else 0).astype('category')

In [28]:
df_train['diagnosis'].value_counts()

1    357
0    212
Name: diagnosis, dtype: int64

In [29]:
df_train.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,0,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,0,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,0,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,0,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### Load data in Tabular Dataset

In [30]:
df_train.to_csv('./data/cancer_train_data.csv',index=False)

In [31]:
ds_tr = ws.get_default_datastore()

In [32]:
ds_tr.upload(src_dir='./data', target_path='cancerdata2', overwrite=True, show_progress=True)

Uploading an estimated of 2 files
Uploading ./data/cancer_data.csv
Uploaded ./data/cancer_data.csv, 1 files out of an estimated total of 2
Uploading ./data/cancer_train_data.csv
Uploaded ./data/cancer_train_data.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_85bb13a5c3be4ba9a3d924c427018084

In [33]:
training_data = Dataset.Tabular.from_delimited_files(path=ds_tr.path('cancerdata2/cancer_train_data.csv'))

In [34]:
training_data.to_pandas_dataframe()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,0,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,0,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,0,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,0,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,0,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,0,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,0,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,0,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


## AutoML Configuration

Most of them were set to limit the compute time and cost.

In [35]:
from azureml.train.automl import AutoMLConfig
import time
import logging

automl_settings = {
    "name": "udacity_AutoML_BreastCancer_{0}".format(time.time()),
    'iterations':20,
    "experiment_timeout_minutes" : 20,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes": 10,
    "n_cross_validations": 5,
    "primary_metric": 'AUC_weighted',
    "max_concurrent_iterations": 3,
    'featurization': 'auto'
}

automl_config = AutoMLConfig(task='classification',
                             compute_target=gpu_cluster,
                             training_data = training_data,
                             label_column_name = 'diagnosis',
                             run_configuration = conda_run_config,
                             **automl_settings,
                             )

In [36]:
# TODO: Submit your experiment
automl_run = exp.submit(automl_config)

Running on remote.


## Run Details


In [37]:
from azureml.widgets import RunDetails
RunDetails(automl_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model




In [39]:
best_run, fitted_model = automl_run.get_output()
print(best_run)

Package:azureml-automl-runtime, training version:1.20.0, current version:1.19.0
Package:azureml-core, training version:1.20.0, current version:1.19.0
Package:azureml-dataprep, training version:2.7.3, current version:2.6.1
Package:azureml-dataprep-native, training version:27.0.0, current version:26.0.0
Package:azureml-dataprep-rslex, training version:1.5.0, current version:1.4.0
Package:azureml-dataset-runtime, training version:1.20.0, current version:1.19.0
Package:azureml-defaults, training version:1.20.0, current version:1.19.0
Package:azureml-interpret, training version:1.20.0, current version:1.19.0
Package:azureml-pipeline-core, training version:1.20.0, current version:1.19.0
Package:azureml-telemetry, training version:1.20.0, current version:1.19.0
Package:azureml-train-automl-client, training version:1.20.0, current version:1.19.0
Package:azureml-train-automl-runtime, training version:1.20.0.post1, current version:1.19.0


Run(Experiment: udacity-AutoML-BreastCancer,
Id: AutoML_869bcde3-2ac8-4d96-be21-16a61f99d58f_14,
Type: azureml.scriptrun,
Status: Completed)


In [41]:
print(best_run.properties['score'])

0.9950097252534554


In [42]:
fitted_model.steps[1][1].estimators

[('0',
  Pipeline(memory=None,
           steps=[('maxabsscaler', MaxAbsScaler(copy=True)),
                  ('lightgbmclassifier',
                   LightGBMClassifier(boosting_type='gbdt', class_weight=None,
                                      colsample_bytree=1.0,
                                      importance_type='split', learning_rate=0.1,
                                      max_depth=-1, min_child_samples=20,
                                      min_child_weight=0.001, min_split_gain=0.0,
                                      n_estimators=100, n_jobs=1, num_leaves=31,
                                      objective=None, random_state=None,
                                      reg_alpha=0.0, reg_lambda=0.0, silent=True,
                                      subsample=1.0, subsample_for_bin=200000,
                                      subsample_freq=0, verbose=-10))],
           verbose=False)),
 ('1',
  Pipeline(memory=None,
           steps=[('maxabsscaler', MaxAbsSca

## Model Deployment



In [50]:
from azureml.train.automl.run import AutoMLRun
run_id = 'AutoML_869bcde3-2ac8-4d96-be21-16a61f99d58f_14'
automl_training_run = AutoMLRun(exp,run_id)
automl_training_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-AutoML-BreastCancer,AutoML_869bcde3-2ac8-4d96-be21-16a61f99d58f_14,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [51]:
model_name  = best_run.properties['model_name']
model_name

'AutoML869bcde3214'

In [34]:
script_filename = './inference/score.py'
best_run.download_file('outputs/scoring_file_v_1_0_0.py',script_filename)

In [35]:
description = 'Breast Cancer Classification'
model = automl_run.register_model(model_name = model_name,description= description,tags =None)

print(automl_run.model_id)
model

AutoML0bcb2574d18


Model(workspace=Workspace.create(name='MLworkspace', subscription_id='5831e312-31c6-42ba-b4a9-5c780e5ea1da', resource_group='MLAzureGroup'), name=AutoML0bcb2574d18, id=AutoML0bcb2574d18:2, version=2, tags={}, properties={})

In [36]:
from azureml.core.environment import Environment 
from azureml.core.webservice import AciWebservice,Webservice
from azureml.core.model import Model,InferenceConfig

aci_service_name = 'breast-cancer-classifier'
inference_config = InferenceConfig(entry_script = script_filename, environment = best_run.get_environment())

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                                   memory_gb=1, 
                                                   tags={'type':"breast-cancer-classification"}, 
                                               description='Udacity project Breast cancer classification')

aciservice = Model.deploy(workspace=ws, 
                       name=aci_service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

aciservice.wait_for_deployment(show_output=True)
print(aciservice.state)
print(aciservice.scoring_uri)
print(aciservice.swagger_uri)
print(aciservice.get_logs())

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running....................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Transitioning
None
None
2021-01-20T17:38:35,261786100+00:00 - gunicorn/run 
2021-01-20T17:38:35,272098500+00:00 - rsyslog/run 
2021-01-20T17:38:35,270278500+00:00 - iot-server/run 
2021-01-20T17:38:35,333184500+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_23df5d07c5e8388dac3ecfdf573dafb1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_23df5d07c5e8388dac3ecfdf573dafb1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_23df5d07c5e8388dac3ecfdf573dafb1/lib/libssl.so.1.0.0: no version information available (required by 

TODO: In the cell below, send a request to the web service you deployed to test it.

In [39]:
#%%writefile test_record.json

test_sample = {"data" : [{"radius_mean":17.99,
                     "texture_mean":10.38,
                     "perimeter_mean":122.8,
                    "area_mean":1001.0,
                     "smoothness_mean":0.1184,
                     "compactness_mean":0.2776,
                     "concavity_mean":0.3001,
                     "concave points_mean":0.1471,
                     "symmetry_mean":0.2419,
                     "fractal_dimension_mean":0.07871,
                     "radius_se":1.095,
                     "texture_se":0.9053,
                     "perimeter_se":8.589,
                     "area_se":153.4,
                     "smoothness_se":0.006399,
                     "compactness_se":0.04904,
                     "concavity_se":0.05373,
                     "concave points_se":0.01587,
                     "symmetry_se":0.03003,
                     "fractal_dimension_se":0.006193,
                     "radius_worst":25.38,
                     "texture_worst":17.33,
                     "perimeter_worst":184.6,
                     "area_worst":2019.0,
                     "smoothness_worst":0.1622,
                     "compactness_worst":0.6656,
                     "concavity_worst":0.7119,
                     "concave points_worst":0.2654,
                     "symmetry_worst":0.4601,
                     "fractal_dimension_worst":0.1189}]}


In [40]:
import json
#test_data = json.dumps({'data':[df.to_dict(orient='records')[0]]})
test_data = json.dumps(test_sample)
test_data

'{"data": [{"radius_mean": 17.99, "texture_mean": 10.38, "perimeter_mean": 122.8, "area_mean": 1001.0, "smoothness_mean": 0.1184, "compactness_mean": 0.2776, "concavity_mean": 0.3001, "concave points_mean": 0.1471, "symmetry_mean": 0.2419, "fractal_dimension_mean": 0.07871, "radius_se": 1.095, "texture_se": 0.9053, "perimeter_se": 8.589, "area_se": 153.4, "smoothness_se": 0.006399, "compactness_se": 0.04904, "concavity_se": 0.05373, "concave points_se": 0.01587, "symmetry_se": 0.03003, "fractal_dimension_se": 0.006193, "radius_worst": 25.38, "texture_worst": 17.33, "perimeter_worst": 184.6, "area_worst": 2019.0, "smoothness_worst": 0.1622, "compactness_worst": 0.6656, "concavity_worst": 0.7119, "concave points_worst": 0.2654, "symmetry_worst": 0.4601, "fractal_dimension_worst": 0.1189}]}'

In [46]:
import requests
import json

scoring_uri = 'http://85c4a414-e90c-4838-84ba-d7aa5295a446.eastus.azurecontainer.io/score'

# Convert to JSON string
input_data = json.dumps(test_sample)
# Set the content type
headers = {'Content-Type': 'application/json'}

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())

{"result": [0]}


TODO: In the cell below, print the logs of the web service and delete the service

In [32]:
print(aciservice.get_logs())

### Delete the service

In [63]:
aciservice.delete()