In [1]:
from utils import load_data

# To help the model converge faster, shrink the intensity values (X) from 0-255 to 0-1
X_train = load_data('./data/raw/train-images.gz', False) / 255.0
y_train = load_data('./data/raw/train-labels.gz', True).reshape(-1)

X_test = load_data('./data/raw/test-images.gz', False) / 255.0
y_test = load_data('./data/raw/test-labels.gz', True).reshape(-1)

# Azure Machine Learning service

## Set up workspace

In [2]:
import azureml.core
print(azureml.core.VERSION)

1.0.57


In [3]:
WORKSPACE_NAME=''
SUBSCRIPTION_ID=''
RESOURCE_GROUP=''
LOCATION=''

In [4]:
from azureml.core import Workspace, Experiment, Run
from azureml.exceptions import WorkspaceException

try:
    ws = Workspace.create(
        subscription_id=SUBSCRIPTION_ID, 
        resource_group=RESOURCE_GROUP,
        name=WORKSPACE_NAME,
        location=LOCATION
    )
except WorkspaceException:
    ws = Workspace.get(
        subscription_id=SUBSCRIPTION_ID, 
        resource_group=RESOURCE_GROUP,
        name=WORKSPACE_NAME
    )

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [5]:
# show workspace details
#ws.get_details()

In [6]:
experiment = Experiment(workspace = ws, 
                        name = "automl-experiment")

# Define the machine learning objective and constraints

In [7]:
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
import logging

automl_config = AutoMLConfig(task = 'regression',
                  iteration_timeout_minutes = 5,
                  iterations = 10,
                  primary_metric = 'spearman_correlation',
                  n_cross_validations = 5,
                  debug_log = 'automl.log',
                  verbosity = logging.INFO,
                  X = X_train, 
                  y = y_train
                  #,path = project_folder
                  )

In [8]:
local_run = experiment.submit(automl_config, show_output = True)

Running on local machine
Parent Run ID: AutoML_4c5cba54-63c9-411b-84bc-cd0cf820ae6d
Current status: DatasetCrossValidationSplit. Generating CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   StandardScalerWrapper RandomForest             0:00:44       0.7839    0.7839
         1                                                  0:05:03          nan    0.7839
ERROR: Fit operation exceeded provided timeout, terminat

# Retrieve the best model

In [9]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [10]:
best_run, fitted_model = local_run.get_output()

# Test the best model

In [11]:
y_pred_train = fitted_model.predict(X_train)
y_residual_train = y_train - y_pred_train
y_pred_test = fitted_model.predict(X_test)
y_residual_test = y_test - y_pred_test