In [2]:
from utils import load_data

# To help the model converge faster, shrink the intensity values (X) from 0-255 to 0-1
X_train = load_data('./data/raw/train-images.gz', False) / 255.0
y_train = load_data('./data/raw/train-labels.gz', True).reshape(-1)

X_test = load_data('./data/raw/test-images.gz', False) / 255.0
y_test = load_data('./data/raw/test-labels.gz', True).reshape(-1)

# Azure Machine Learning service

## Set up workspace

In [3]:
import azureml.core
print(azureml.core.VERSION)

1.0.57


In [4]:
WORKSPACE_NAME=''
SUBSCRIPTION_ID=''
RESOURCE_GROUP=''
LOCATION=''

In [5]:
from azureml.core import Workspace, Experiment, Run
from azureml.exceptions import WorkspaceException

try:
    ws = Workspace.create(
        subscription_id=SUBSCRIPTION_ID, 
        resource_group=RESOURCE_GROUP,
        name=WORKSPACE_NAME,
        location=LOCATION
    )
except WorkspaceException:
    ws = Workspace.get(
        subscription_id=SUBSCRIPTION_ID, 
        resource_group=RESOURCE_GROUP,
        name=WORKSPACE_NAME
    )

In [6]:
# show workspace details
ws.get_details()

In [7]:
experiment = Experiment(workspace = ws, 
                        name = "automl-experiment")

# Training

## Define the machine learning objective and constraints

In [8]:
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
import logging

automl_config = AutoMLConfig(task = 'regression',
                  iteration_timeout_minutes = 5,
                  iterations = 10,
                  primary_metric = 'spearman_correlation',
                  n_cross_validations = 5,
                  debug_log = 'automl.log',
                  verbosity = logging.INFO,
                  X = X_train, 
                  y = y_train
                  #,path = project_folder
                  )

In [9]:
local_run = experiment.submit(automl_config, show_output = True)

Running on local machine
Parent Run ID: AutoML_8ab3b1c1-14c5-48f4-af77-58503393628c
Current status: DatasetCrossValidationSplit. Generating CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   StandardScalerWrapper RandomForest             0:00:41       0.7863    0.7863
         1                                                  0:05:06          nan    0.7863
ERROR: Fit operation exceeded provided timeout, terminat

## Retrieve the best model

In [10]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [11]:
# persist locally the best fitted model
best_run, fitted_model = local_run.get_output()

In [12]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment,AutoML_8ab3b1c1-14c5-48f4-af77-58503393628c_9,,Completed,Link to Azure Portal,Link to Documentation


In [13]:
fitted_model

RegressionPipeline(pipeline=Pipeline(memory=None,
     steps=[('stackensembleregressor', StackEnsembleRegressor(base_learners=[('6', RegressionPipeline(pipeline=Pipeline(memory=None,
     steps=[('StandardScalerWrapper', <automl.client.core.common.model_wrappers.StandardScalerWrapper object at 0x000001C7A3094208>), ('LightGBMRegressor', LightGBMRegresso...   random_state=None, selection='cyclic', tol=0.0001, verbose=0),
            training_cv_folds=5))]),
          stddev=None)

## Test the best model

In [14]:
y_pred_train = fitted_model.predict(X_train)
y_residual_train = y_train - y_pred_train
y_pred_test = fitted_model.predict(X_test)
y_residual_test = y_test - y_pred_test