# Training a Model Using AutoML on the Local Compute
Save trained model as Scikit-Learn model (.pkl) and as ONNX model (.onnx file)

Data: IBM Employee Attrition dataset loaded from Azure ML Dataset

##  Get Azure ML Workspace to use

In [51]:
# azureml-core of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset

# Get Workspace defined in by default config.json file
ws = Workspace.from_config()

## Load data from Azure ML Datasets into Pandas DataFrame

In [52]:
# Load Data
aml_dataset = ws.datasets['IBM-Employee-Attrition']

# Use Pandas DataFrame just to sneak peak some data and schema
full_df = aml_dataset.to_pandas_dataframe()
# .to_pandas_dataframe().dropna()
full_df.head(5)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,1,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,0,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,1,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,0,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,0,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


In [53]:
# Use Pandas DataFrame just to investigate the dataset's schema and info
full_df.describe()

Unnamed: 0,Age,Attrition,DailyRate,DistanceFromHome,Education,EmployeeCount,EmployeeNumber,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
count,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,...,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0
mean,36.92,0.16,802.49,9.19,2.91,1.0,1024.87,2.72,65.89,2.73,...,2.71,80.0,0.79,11.28,2.8,2.76,7.01,4.23,2.19,4.12
std,9.14,0.37,403.51,8.11,1.02,0.0,602.02,1.09,20.33,0.71,...,1.08,0.0,0.85,7.78,1.29,0.71,6.13,3.62,3.22,3.57
min,18.0,0.0,102.0,1.0,1.0,1.0,1.0,1.0,30.0,1.0,...,1.0,80.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,30.0,0.0,465.0,2.0,2.0,1.0,491.25,2.0,48.0,2.0,...,2.0,80.0,0.0,6.0,2.0,2.0,3.0,2.0,0.0,2.0
50%,36.0,0.0,802.0,7.0,3.0,1.0,1020.5,3.0,66.0,3.0,...,3.0,80.0,1.0,10.0,3.0,3.0,5.0,3.0,1.0,3.0
75%,43.0,0.0,1157.0,14.0,4.0,1.0,1555.75,4.0,83.75,3.0,...,4.0,80.0,1.0,15.0,3.0,3.0,9.0,7.0,3.0,7.0
max,60.0,1.0,1499.0,29.0,5.0,1.0,2068.0,4.0,100.0,4.0,...,4.0,80.0,3.0,40.0,6.0,4.0,40.0,18.0,15.0,17.0


## Clean up the initial dataset (Using related Pandas DataFrame)

In [54]:
# Dropping Employee count as all values are 1 and hence attrition is independent of this feature
full_df = full_df.drop(['EmployeeCount'], axis=1)
# Dropping Employee Number since it is merely an identifier
full_df = full_df.drop(['EmployeeNumber'], axis=1)

full_df = full_df.drop(['Over18'], axis=1)

# Since all values are 80
full_df = full_df.drop(['StandardHours'], axis=1)

## Split original dataset in test/train sets using Scikit-Learn train_test_split function

In [5]:
from sklearn.model_selection import train_test_split

# Split using ScikitLearn train_test_split function using Dataframes
# Will use test dataframe at the end, without AutoML, just for testing predictions with the model

# Only split in test/train
train_df, test_df = train_test_split(full_df, test_size=0.2, random_state=1)
train_df.describe()

# Split in x/y and test/train
# y_df = full_df.pop("Attrition")
# x_df = full_df
# x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.1, random_state=1)

#Another possibility would be to split using the Azure ML Datasets (Better for Remote Compute): 
# https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py#random-split-percentage--seed-none-

Unnamed: 0,Age,Attrition,DailyRate,DistanceFromHome,Education,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
count,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,...,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0,1176.0
mean,37.097789,0.152211,810.272109,9.220238,2.92432,2.72449,65.661565,2.72534,2.069728,2.722789,...,3.152211,2.746599,0.786565,11.307823,2.813776,2.748299,6.883503,4.221088,2.134354,4.107993
std,9.123351,0.359378,404.535732,8.120369,1.016571,1.09184,20.309002,0.726569,1.107914,1.094524,...,0.359378,1.066506,0.847237,7.775825,1.283021,0.703485,5.956226,3.625268,3.201432,3.495807
min,18.0,0.0,102.0,1.0,1.0,1.0,30.0,1.0,1.0,1.0,...,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,30.0,0.0,473.5,2.0,2.0,2.0,48.0,2.0,1.0,2.0,...,3.0,2.0,0.0,6.0,2.0,2.0,3.0,2.0,0.0,2.0
50%,36.0,0.0,809.0,7.0,3.0,3.0,65.0,3.0,2.0,3.0,...,3.0,3.0,1.0,10.0,3.0,3.0,5.0,3.0,1.0,3.0
75%,43.0,0.0,1159.0,14.0,4.0,4.0,83.0,3.0,3.0,4.0,...,3.0,4.0,1.0,15.0,3.0,3.0,9.0,7.0,2.0,7.0
max,60.0,1.0,1499.0,29.0,5.0,4.0,100.0,4.0,5.0,4.0,...,4.0,4.0,3.0,40.0,6.0,4.0,40.0,18.0,15.0,17.0


In [6]:
test_df.describe()

Unnamed: 0,Age,Attrition,DailyRate,DistanceFromHome,Education,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
count,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,...,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0
mean,36.227891,0.197279,771.340136,9.081633,2.867347,2.710884,66.809524,2.748299,2.040816,2.751701,...,3.159864,2.57483,0.823129,11.166667,2.741497,2.812925,7.506803,4.261905,2.401361,4.183673
std,9.165846,0.398623,398.537129,8.065441,1.054531,1.099837,20.41977,0.648901,1.104614,1.137139,...,0.367105,1.12958,0.87202,7.812835,1.314599,0.717197,6.752951,3.620586,3.302,3.849596
min,18.0,0.0,115.0,1.0,1.0,1.0,30.0,1.0,1.0,1.0,...,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,30.0,0.0,427.5,2.0,2.0,2.0,50.0,2.0,1.0,2.0,...,3.0,2.0,0.0,6.0,2.0,2.0,3.0,2.0,0.0,1.25
50%,35.0,0.0,726.5,7.0,3.0,3.0,68.5,3.0,2.0,3.0,...,3.0,3.0,1.0,10.0,3.0,3.0,5.0,3.0,1.0,3.0
75%,42.0,0.0,1146.0,14.0,4.0,4.0,84.75,3.0,2.0,4.0,...,3.0,4.0,1.0,15.0,3.0,3.0,10.0,7.0,4.0,7.0
max,59.0,1.0,1498.0,29.0,5.0,4.0,100.0,4.0,5.0,4.0,...,4.0,4.0,3.0,37.0,6.0,4.0,36.0,18.0,15.0,17.0


## List and select primary metric to drive the AutoML classification problem

In [7]:
from azureml.train import automl

# List of possible primary metrics is here:
# https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-auto-train#primary-metric
    
# Get a list of valid metrics for your given task
automl.utilities.get_primary_metrics('classification')

# I'll use 'accuracy' as primary metric (Closer to 1.00 is better)

['precision_score_weighted',
 'norm_macro_recall',
 'AUC_weighted',
 'accuracy',
 'average_precision_score_weighted']

## Define AutoML Experiment settings
One of the AutoMLConfig settings is the *enable_onnx_compatible_models* parameter.

> ONNX is an open format built to represent machine learning models. ONNX defines a common set of operators - the building blocks of machine learning and deep learning models - and a common file format to enable AI developers to use models with a variety of frameworks, tools, runtimes, and compilers. ([ONNX Home Page](https://onnx.ai/))

In [8]:
import logging

# Explanation of Settings: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-auto-train#configure-your-experiment-settings

# AutoMLConfig info on: 
# https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig

# You can provide additional settings as a **kwargs parameter for the AutoMLConfig object
# automl_settings = {
#     "whitelist_models": 'XGBoostClassifier'
# }

from azureml.train.automl import AutoMLConfig

# If the 'compute_target' parameter is not used, the computation will happen in the local environment
automl_config = AutoMLConfig(task='classification',
                             primary_metric='accuracy',
                             # experiment_timeout_minutes= 20,                            
                             training_data=train_df,
                             label_column_name="Attrition",
                             n_cross_validations= 5,
                             # blacklist_models='XGBoostClassifier', 
                             # iteration_timeout_minutes= 5,                                                    
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log='automated_ml_errors.log',
                             verbosity= logging.INFO,
                             enable_onnx_compatible_models=True
                             # **automl_settings
                             )

# WARNING: If using X and y parameters (deprecated) you get the following warning
# WARNING - The AutoMLConfig inputs you have specified will soon be deprecated. Please use the AutoMLConfig shown in our documentation: https://aka.ms/AutoMLConfig


## Run Experiment with multiple child runs under the covers

In [9]:
from azureml.core import Experiment
from datetime import datetime

#now = datetime.now()
#time_string = now.strftime("%m-%d-%Y-%H")
#time_string = now.strftime("%m-%d-%Y")
#print(time_string)
#experiment_name = "aml-wrkshp-automl-local-{0}".format(time_string)

experiment_name = "aml-wrkshp-automl-local"
print(experiment_name)

experiment = Experiment(workspace=ws, 
                        name=experiment_name)

run = experiment.submit(automl_config, show_output=True)

aml-wrkshp-automl-local
Running on local machine
Parent Run ID: AutoML_93a6946f-2fe6-4cdb-b934-c6c68e178533

Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data

## Explore results with Widget

In [10]:
# Explore the results of automatic training with a Jupyter widget: https://docs.microsoft.com/en-us/python/api/azureml-widgets/azureml.widgets?view=azure-ml-py
from azureml.widgets import RunDetails
RunDetails(run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Retrieve the 'Best' Scikit-Learn Model

In [11]:
best_run, fitted_model = run.get_output()
print(best_run)
print('--------')
print(fitted_model)

Run(Experiment: aml-wrkshp-automl-local,
Id: AutoML_93a6946f-2fe6-4cdb-b934-c6c68e178533_30,
Type: None,
Status: Completed)
--------
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(allow_chargram=None, enable_dnn=None,
                                 enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedso...
                                                                                                  learning_rate='invscaling',
                                                                                                  l

## Retrieve the 'Best' ONNX Model
Below we select the best pipeline from our iterations. The get_output method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on get_output allow you to retrieve the best run and fitted model for any logged metric or for a particular iteration.
Set the parameter return_onnx_model=True to retrieve the best ONNX model, instead of the Python model.

In [12]:
best_run, onnx_mdl = run.get_output(return_onnx_model=True)

### Explicetely Save the best ONNX model on local drive path

In [13]:
from azureml.automl.runtime.onnx_convert import OnnxConverter
onnx_fl_path = "./best_model.onnx"
OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)

## See files associated with the 'Best run'

In [14]:
print(best_run.get_file_names())

# best_run.download_file('azureml-logs/70_driver_log.txt')

['accuracy_table', 'confusion_matrix', 'explanation/63c5bbba/classes.interpret.json', 'explanation/63c5bbba/expected_values.interpret.json', 'explanation/63c5bbba/features.interpret.json', 'explanation/63c5bbba/global_names/0.interpret.json', 'explanation/63c5bbba/global_rank/0.interpret.json', 'explanation/63c5bbba/global_values/0.interpret.json', 'explanation/63c5bbba/local_importance_values.interpret.json', 'explanation/63c5bbba/per_class_names/0.interpret.json', 'explanation/63c5bbba/per_class_rank/0.interpret.json', 'explanation/63c5bbba/per_class_values/0.interpret.json', 'explanation/63c5bbba/rich_metadata.interpret.json', 'explanation/63c5bbba/visualization_dict.interpret.json', 'outputs/conda_env_v_1_0_0.yml', 'outputs/env_dependencies.json', 'outputs/model.onnx', 'outputs/model.pkl', 'outputs/model_onnx.json', 'outputs/pipeline_graph.json', 'outputs/scoring_file_v_1_0_0.py']


## Download experiment run related files 
Model files (.pkl and .onnx), Environment files to see Conda and Environment dependencies used by AutoML, etc.

In [16]:
# Download the run's files
best_run.download_file('outputs/model.pkl')
best_run.download_file('outputs/model.onnx')
best_run.download_file('outputs/model_onnx.json')
best_run.download_file('outputs/conda_env_v_1_0_0.yml')
best_run.download_file('outputs/env_dependencies.json')
best_run.download_file('outputs/scoring_file_v_1_0_0.py')
best_run.download_file('outputs/pipeline_graph.json')

In [37]:
# AzureML dependencies
best_run_details = best_run.get_details()

dependencies_versions = best_run_details['properties']['dependencies_versions']
dependencies_versions

'{"azureml-widgets": "1.17.0", "azureml-train": "1.17.0", "azureml-train-restclients-hyperdrive": "1.17.0", "azureml-train-core": "1.17.0", "azureml-train-automl": "1.17.0", "azureml-train-automl-runtime": "1.17.0", "azureml-train-automl-client": "1.17.0", "azureml-tensorboard": "1.17.0", "azureml-telemetry": "1.17.0", "azureml-sdk": "1.17.0", "azureml-samples": "0+unknown", "azureml-pipeline": "1.17.0", "azureml-pipeline-steps": "1.17.0", "azureml-pipeline-core": "1.17.0", "azureml-opendatasets": "1.17.0", "azureml-model-management-sdk": "1.0.1b6.post1", "azureml-mlflow": "1.17.0.post1", "azureml-interpret": "1.17.0", "azureml-explain-model": "1.17.0", "azureml-defaults": "1.17.0", "azureml-dataset-runtime": "1.17.0", "azureml-dataprep": "2.4.2", "azureml-dataprep-rslex": "1.2.2", "azureml-dataprep-native": "24.0.0", "azureml-datadrift": "1.17.0", "azureml-core": "1.17.0", "azureml-contrib-services": "1.17.0", "azureml-contrib-server": "1.17.0", "azureml-contrib-reinforcementlearning"

## Register the Scikit-Learn model (.pkl file)
Once you've trained the model, you can save and register it to your workspace. Model registration lets you store and version your models in your workspace to simplify model management and deployment.

If investigating the *conda_env_v_1_0_0.yml* file, it's possible to get the verisons of packages used by AutoML.

In [21]:
import re

package_versions = {}

with open('conda_env_v_1_0_0.yml') as myfile:
    for line in myfile.readlines():
        
        skt_search = re.search(r'scikit-learn==(?P<ver>\d+\.\d+(?:\.\d+(?:\.\w+)?)?)', line)
        pnd_search = re.search(r'pandas==(?P<ver>\d+\.\d+(?:\.\d+(?:\.\w+)?)?)', line)
        
        if skt_search:
            skt = skt_search.group('ver')
            package_versions['scikit-learn'] = skt
        
        if pnd_search:
            pnd = pnd_search.group('ver')
            package_versions['pandas'] = pnd

print(package_versions)

{'pandas': '0.25.1', 'scikit-learn': '0.22.1'}


In [None]:
# Why best_run.get_environment() fails?

# best_run_environment = best_run.get_environment() 
# print(best_run_environment)

Running the following code will register the model to your workspace, and will make it available to reference by name in remote compute contexts or deployment scripts.

In [40]:
from azureml.core.model import Model

model_reg = best_run.register_model(
    model_name='aml-wrkshp-classif-empl-automl-local',        # Name of the registered model in your workspace.
    description='Binary classification model for employees attrition. From AutoML local training',
    model_path='outputs/model.pkl',                           # Path of file into the run to upload and register as a model
    model_framework=Model.Framework.SCIKITLEARN,              # Framework used to create the model
    model_framework_version=package_versions['scikit-learn'],
    tags={'ml-task': "binary-classification", 'business-area': "HR"},
    properties={'pandas-version': package_versions['pandas']},
    sample_input_dataset=aml_dataset
)

print(model_reg)


Model(workspace=Workspace.create(name='demo-ent-ws', subscription_id='bcbf34a7-1936-4783-8840-8f324c37f354', resource_group='demo'), name=aml-wrkshp-classif-empl-automl-local, id=aml-wrkshp-classif-empl-automl-local:1, version=1, tags={'ml-task': 'binary-classification', 'business-area': 'HR'}, properties={'pandas-version': '0.25.1'})


## Make Predictions

### Prep Test Data: Extract X values (feature columns) from test dataset and convert to NumPi array for predicting 

In [42]:
import pandas as pd

#Remove Label/y column
if 'Attrition' in test_df.columns:
    y_test_df = test_df.pop('Attrition')

x_test_df = test_df

In [43]:
x_test_df.describe()

Unnamed: 0,Age,DailyRate,DistanceFromHome,Education,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,MonthlyIncome,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
count,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,...,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0
mean,36.23,771.34,9.08,2.87,2.71,66.81,2.75,2.04,2.75,6448.35,...,3.16,2.57,0.82,11.17,2.74,2.81,7.51,4.26,2.4,4.18
std,9.17,398.54,8.07,1.05,1.1,20.42,0.65,1.1,1.14,4769.72,...,0.37,1.13,0.87,7.81,1.31,0.72,6.75,3.62,3.3,3.85
min,18.0,115.0,1.0,1.0,1.0,30.0,1.0,1.0,1.0,1052.0,...,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,30.0,427.5,2.0,2.0,2.0,50.0,2.0,1.0,2.0,2934.5,...,3.0,2.0,0.0,6.0,2.0,2.0,3.0,2.0,0.0,1.25
50%,35.0,726.5,7.0,3.0,3.0,68.5,3.0,2.0,3.0,4838.0,...,3.0,3.0,1.0,10.0,3.0,3.0,5.0,3.0,1.0,3.0
75%,42.0,1146.0,14.0,4.0,4.0,84.75,3.0,2.0,4.0,7551.5,...,3.0,4.0,1.0,15.0,3.0,3.0,10.0,7.0,4.0,7.0
max,59.0,1498.0,29.0,5.0,4.0,100.0,4.0,5.0,4.0,19999.0,...,4.0,4.0,3.0,37.0,6.0,4.0,36.0,18.0,15.0,17.0


### Make Predictions with Scikit-Learn Model

#### (Optional) Download Model from Registry and load in-memory

In [44]:
print(Model.get_model_path('aml-wrkshp-classif-empl-automl-local', _workspace=ws))

azureml-models/aml-wrkshp-classif-empl-automl-local/1/model.pkl


In [45]:
model_definition_from_registry = Model(ws,'aml-wrkshp-classif-empl-automl-local')
model_definition_from_registry.download(target_dir='.', exist_ok=True)
print(model_definition_from_registry)
print('-------')

# Load the model into memory
import joblib
fitted_model = joblib.load('model.pkl')
print(fitted_model)

Model(workspace=Workspace.create(name='demo-ent-ws', subscription_id='bcbf34a7-1936-4783-8840-8f324c37f354', resource_group='demo'), name=aml-wrkshp-classif-empl-automl-local, id=aml-wrkshp-classif-empl-automl-local:1, version=1, tags={'ml-task': 'binary-classification', 'business-area': 'HR'}, properties={'pandas-version': '0.25.1'})
-------
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(allow_chargram=None, enable_dnn=None,
                                 enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedso...
             

In [46]:
# Try the best model making predictions with the test dataset
y_predictions = fitted_model.predict(x_test_df)

print('10 predictions: ')
print(y_predictions[:10])

10 predictions: 
[0 1 0 0 0 0 0 0 0 0]


In [47]:
# Try the best model making predictions with the test dataset
y_predictions_proba = fitted_model.predict_proba(x_test_df)

print('10 probability predictions: ')
print(y_predictions_proba[:10])

10 probability predictions: 
[[0.84129938 0.15870062]
 [0.26431374 0.73568626]
 [0.5107778  0.4892222 ]
 [0.5372073  0.4627927 ]
 [0.74856691 0.25143308]
 [0.96562593 0.03437406]
 [0.97408512 0.02591488]
 [0.75047251 0.24952749]
 [0.96218621 0.03781379]
 [0.94024203 0.05975797]]


## Make Predictions with the ONNX model, using onnxruntime package
Needs pip install onnxruntime==1.0.0' in environment (Also try with 1.1.0 version)

In [49]:
import sys
import json
from azureml.automl.core.onnx_convert import OnnxConvertConstants
from azureml.train.automl import constants

if sys.version_info < OnnxConvertConstants.OnnxIncompatiblePythonVersion:
    python_version_compatible = True
else:
    python_version_compatible = False
    
import onnxruntime
from azureml.automl.runtime.onnx_convert import OnnxInferenceHelper

def get_onnx_res(run):
    res_path = 'onnx_resource.json'
    run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, output_file_path=res_path)
    with open(res_path) as f:
        onnx_res = json.load(f)
    return onnx_res

if python_version_compatible:
    # test_df = test_dataset.to_pandas_dataframe()
    mdl_bytes = onnx_mdl.SerializeToString()
    onnx_res = get_onnx_res(best_run)

    onnxrt_helper = OnnxInferenceHelper(mdl_bytes, onnx_res)
    pred_onnx, pred_prob_onnx = onnxrt_helper.predict(x_test_df)

    print('Predicting with ONNX model...')
    print(pred_onnx)
    print(pred_prob_onnx)
else:
    print('Please use Python version 3.6 or 3.7 to run the inference helper.')

Predicting with ONNX model...
[0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[[0.81275314 0.18724684]
 [0.28453526 0.7154648 ]
 [0.5145427  0.48545727]
 [0.5114399  0.48856014]
 [0.7132093  0.28679064]
 [0.92584026 0.07415981]
 [0.93522877 0.06477132]
 [0.7168464  0.2831536 ]
 [0.92486936 0.07513066]
 [0.9074605  0.09253959]
 [0.9289322  0.07106783]
 [0.75791425 0.24208574]
 [0.35312462 0.64687544]
 [0.6215406  0.3784594 ]
 [0.81811845 0.18188159

### Calculate the Accuracy with Test Dataset (Data not used for training)

In [50]:
from sklearn.metrics import accuracy_score

print('Accuracy with Scikit-Learn model:')
print(accuracy_score(y_test_df, y_predictions))

print('Accuracy with ONNX model:')
print(accuracy_score(y_test_df, pred_onnx))

Accuracy with Scikit-Learn model:
0.8469387755102041
Accuracy with ONNX model:
0.8503401360544217
