In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="cifar10_cnn_local")

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [2]:
import tensorflow
from keras.datasets import cifar10
from keras import layers, Sequential, optimizers, models
from keras.callbacks import ModelCheckpoint

import os

# Keras settings
batch_size = 32
num_classes = 10
epochs = 5
num_predictions = 20

# the data split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes)
y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# define the model type 
model = Sequential()
model.add(layers.Conv2D(32, (3, 3), input_shape=x_train.shape[1:]))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(64, (3, 3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())
model.add(layers.Dense(128))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(num_classes))
model.add(layers.Activation('softmax'))

# define model name and file locations
model_name = 'keras_cifar10_trained_model.h5'
model_output_dir = os.path.join(os.getcwd(), 'outputs')

# initiate RMSprop optimizer (https://keras.io/api/optimizers/rmsprop/)
opt = optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# define checkpoint function to only save the model after each epoch if it is "better"
# (decided based on the validation loss function) in the output file path
if not os.path.isdir(model_output_dir):
    os.makedirs(model_output_dir)
model_path = os.path.join(model_output_dir, model_name)
checkpoint_cb = ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True)

# define the loss function, optimizer and additionally tracked metrics of the model training
# (https://keras.io/api/losses/probabilistic_losses/#categoricalcrossentropy-class)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

2022-11-14 14:07:44.380095: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


2022-11-14 14:08:37.144962: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from azureml.core import Run

# define function to get the best value of a specific metric of all runs in the experiment
def get_metrics_from_exp(experiment, metric, status='Completed'):
    for run in Run.list(exp, status=status):
        yield run.get_metrics().get(metric)

# import callback python script from code folder
from code.keras_azure_ml_cb import AzureMlKerasCallback

In [4]:
# Create a run
with exp.start_logging(snapshot_directory='.') as run:

    # create an Azure Machine Learning monitor callback
    azureml_cb = AzureMlKerasCallback(run)

    # train the model for a certain number of epochs
    model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_split = 0.2,
            shuffle=True,
            callbacks=[azureml_cb, checkpoint_cb])

    # load the overall best model of all epochs into the model object
    model = models.load_model(model_path)

    # evaluate the best model against the test dataset and log them to Azure ML
    scores = model.evaluate(x_test, y_test, verbose=1)
    print('Test loss of best model:', scores[0])
    run.log('Test loss', scores[0])
    print('Test accuracy of best model:', scores[1])
    run.log('Test accuracy', scores[1])

    # Upload the model binary file(s) of the best model
    run.upload_file(model_name, model_path)

    # get the best accuracy out of every run before
    best_test_acc = max(get_metrics_from_exp(exp, 'Test accuracy'), default=0)

    # Register the best model if it is better than in any previous model training
    if scores[1] > best_test_acc:
        run.register_model(model_name, model_path=model_name, model_framework='TfKeras')
  


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss of best model: 1.4003421068191528
Test accuracy of best model: 0.5133000016212463


Tfkeras will be deprecated soon. Use Model.Framework.TENSORFLOW instead.


In [5]:
# Checking the run metrics
print(get_metrics_from_exp(exp, 'Test accuracy'))
for metrics_run in get_metrics_from_exp(exp, 'Test accuracy'):
    print(metrics_run)

<generator object get_metrics_from_exp at 0x7f88238fd740>
0.5133000016212463
0.5077000260353088
0.5090000033378601
0.5105999708175659
