In [1]:
from azureml.core import Workspace, Experiment, Dataset, Datastore, Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.train.automl import AutoMLConfig
from azureml.core.environment import Environment
import pandas as pd
import numpy as np
import joblib

In [2]:
ws = Workspace.from_config()
experiment_name = 'california-housing-onnx'
experiment=Experiment(ws, experiment_name)

In [3]:
!pip install onnxruntime
import onnxruntime
from azureml.automl.runtime.onnx_convert import OnnxConverter



In [4]:
compute_name = "nuria-p3"
try:
    compute_target = ComputeTarget(workspace=ws, name=compute_name)
    print('Using existing compute target.')
except ComputeTargetException:
    print('Creating compute target.')
    cluster_type = 1
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='Standard_D12_V2', min_nodes=1, max_nodes=5)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

compute_target.wait_for_completion(show_output=True)

Using existing compute target.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [5]:
def create_datasets(x,y):
    columns_x = ['MedInc','HouseAge','AveRooms','AveBedrms','Population','AveOccup','Latitude','Longitude']
    x_df = pd.DataFrame(x,columns=columns_x)  
    columns_y = ['MedHouseVal']
    y_df = pd.DataFrame(y,columns=columns_y)  
    
    x_tr,x_test,y_tr,y_test = train_test_split(x_df,y_df, test_size = 20, random_state=0)

    return x_tr,x_test,y_tr,y_test

In [6]:
# load data and create datasets for training and testing
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
  
x,y = fetch_california_housing(return_X_y=True)
x_train,x_test,y_train,y_test = create_datasets(x,y)
data = pd.concat([x_train,y_train], axis=1)

In [7]:
# Check if train dataset is already registered, if not, register it.
found = False
datastore=ws.get_default_datastore() 
if 'data' in ws.datasets.keys(): 
        found = True
if not found:
        TabularDatasetFactory.register_pandas_dataframe(data, datastore,'data')
ds = Dataset.get_by_name(ws,'data')

In [8]:
# TODO: Put your automl settings here
automl_settings = { "experiment_timeout_minutes": 20,
                    "max_concurrent_iterations": 5,
                    "primary_metric" : 'r2_score'}      

# TODO: Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = 'regression',
                             training_data = ds,
                             label_column_name ='MedHouseVal',   
                             path = './automl',
                             enable_early_stopping = True,
                             featurization = 'auto',
                             validation_size = 0.2,
                             debug_log = 'automl_errors.log',
                             enable_onnx_compatible_models = True,
                             model_explainability=True,
                             **automl_settings)

In [9]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Running on remote.


In [10]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=False)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_261eca2c-e7de-4823-892a-927f1066071f',
 'target': 'nuria-p3',
 'status': 'Completed',
 'startTimeUtc': '2021-01-26T19:13:51.387204Z',
 'endTimeUtc': '2021-01-26T19:37:20.858015Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'r2_score',
  'train_split': '0.2',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'nuria-p3',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"b39e2ffd-80c7-426a-a522-1852e2dead89\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"managed-dataset/93d95790-2319-4202-854e-e99533b8ab7f/\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-136165\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"9a7511b8-150f-4a58-8528-3

In [11]:
best_run, onnx_mdl = remote_run.get_output(return_onnx_model=True)

In [12]:
onnx_fl_path = "automl/best-model.onnx"
OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)

In [13]:
session = onnxruntime.InferenceSession(onnx_fl_path)

In [14]:
session.get_modelmeta()
for i in range(8):
    var = 'input_name_' + str(i)
    var = session.get_inputs()[i].name
    print(var)

MedInc
HouseAge
AveRooms
AveBedrms
Population
AveOccup
Latitude
Longitude


In [19]:
x_test.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
14740,4.15,22.0,5.66,1.08,1551.0,4.18,32.58,-117.05
10101,5.78,32.0,6.11,0.93,1296.0,3.02,33.92,-117.97
20566,4.35,29.0,5.93,1.03,1554.0,2.91,38.65,-121.84
2670,2.45,37.0,4.99,1.32,390.0,2.75,33.2,-115.6
15709,5.0,25.0,4.32,1.04,649.0,1.71,37.79,-122.43


In [22]:
results = session.run([], {"MedInc": np.array((2.45)).astype(np.float32), 
                           "HouseAge": np.array((37.00)).astype(np.float32), 
                           "AveRooms": np.array((4.99)).astype(np.float32),
                           "AveBedrms": np.array((1.32)).astype(np.float32),
                           "Population": np.array((390.00)).astype(np.float32),
                           "AveOccup": np.array((2.75)).astype(np.float32),
                           "Latitude": np.array((33.20)).astype(np.float32),
                           "Longitude": np.array((-115.60)).astype(np.float32)})

InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Invalid rank for input: Population Got: 0 Expected: 2 Please fix either the inputs or the model.