Azure ML & Azure Databricks notebooks by René Bremer (original taken from Parashar Shah)

Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

##### In this notebook the following steps will be excuted:

1. Log metrics of models that was trained on 2000 pictures and all 60000 pictures
2. Register best model (trained with 60000 pictures)

Make sure you added libraries to azureml-sdk[databricks], Keras and TensorFlow to your cluster.

#0. Set parameters

In [0]:
workspace="newws"
resource_grp="newres"
subscription_id="Workspace-e18a6833-eeb6-4db0-858d-b098746f4034-deployment"


path= '/dbfs/CIFAR10/models/'
par_model2000_name = 'cifar_2000pictures.h5'
par_modelall_name = 'cifar_allpictures.h5' 

par_experiment_name = 'cifar10'

# In case cell gets status "cancelled" after execution, uninstall libraries, restart cluster and reinstall libraries

#1.  Log metrics of models

##### 1a. Authenticate to Azure ML workspace (interactive, using AAD and browser)

In [0]:
import sys
import requests
import time
import base64
import datetime
import azureml.core
import shutil
import os, json
from azureml.core import Workspace
from azureml.core.run import Run
from azureml.core.experiment import Experiment
from azureml.core.model import Model
import azureml.core
from azureml.core.authentication import ServicePrincipalAuthentication

ws = Workspace(workspace_name = workspace,
               subscription_id = subscription_id,
               resource_group = resource_grp)

ws.get_details()

##### 1b. Load models from disk where it was stored in previous notebook 1_DeepLearningCifar!0NotebookExploration.py

In [0]:
import keras
from keras.models import load_model

#path= '/dbfs/CIFAR10/models/'
model2000path = path + par_model2000_name
modelallpath = path + par_modelall_name

model2000 = load_model(model2000path)
modelall = load_model(modelallpath)

In [0]:
model2000.summary()

##### 2c. Get testdate to regenerate metrics

In [0]:
from keras.datasets import cifar10
num_classes = 10
# The data, shuffled and split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
#2
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
#3
x_train /= 255
x_test /= 255

##### 2d. Create new experiment in Azure ML service workspace and add both models

In [0]:
# upload the serialized model into run history record
#mdl, ext = par_model_name.split(".")
#model_zip = mdl + ".zip"
#shutil.make_archive('/dbfs/'+ mdl, 'zip', path)
# start a training run by defining an experiment
myexperiment = Experiment(ws, par_experiment_name)
run = myexperiment.start_logging()

score2000 = model2000.evaluate(x_test, y_test, verbose=0)
scoreall = modelall.evaluate(x_test, y_test, verbose=0)

run.log_list("Test accuracy 2000 pics, all pics", [score2000[1], scoreall[1]])
run.log_list("Test loss 2000 pics, all pics", [score2000[0], scoreall[0]])

run.upload_file("outputs/" + par_model2000_name, model2000path)
run.upload_file("outputs/" + par_modelall_name, modelallpath)

run.complete()
run_id = run.id
print ("run id:", run_id)

#2. Register best model (trained with 60000 pictures)

In [0]:
registermodelall = Model.register(
    model_path=modelallpath,  # this points to a local file
    model_name=par_modelall_name,  # this is the name the model is registered as
    tags={"area": "spark", "type": "deeplearning", "run_id": run_id},
    description="Keras deeplearning, all pictures",
    workspace=ws,
)
print("Model registered: {} \nModel Description: {} \nModel Version: {}".format(registermodelall.name, registermodelall.description, registermodelall.version))