In [1]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

Found the config file in: ./.azureml/config.json


In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import os

# Keras settings
batch_size = 32
num_classes = 10
epochs = 5
num_predictions = 20

# the data split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# defining our model 
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# define model name and file locations
model_name = 'keras_cifar10_trained_model.tf'
model_output_dir = os.path.join(os.getcwd(), 'outputs')

# initiate Adam optimizer (https://keras.io/api/optimizers/adam/)
opt = Adam(learning_rate=0.001)

# define checkpoint function to only save the model after each epoch if it is "better"
# (decided based on the validation loss function) in the output file path
if not os.path.isdir(model_output_dir):
    os.makedirs(model_output_dir)
model_path = os.path.join(model_output_dir, model_name)
checkpoint_cb = ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True)

# define the loss function, optimizer and additionally tracked metrics of the model training
# (https://keras.io/api/losses/probabilistic_losses/#categoricalcrossentropy-class)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

2023-08-14 13:31:08.268489: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-14 13:31:08.365011: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-14 13:31:08.366653: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [3]:
# Get the Azure Machine Learning Tracking URI
mlflow_tracking_uri = ml_client.workspaces.get(ml_client.workspace_name).mlflow_tracking_uri
print(mlflow_tracking_uri)

azureml://switzerlandnorth.api.azureml.ms/mlflow/v1.0/subscriptions/3a01bfbc-e48f-4e7d-9ea4-dcf5d7e90278/resourceGroups/ml-edu/providers/Microsoft.MachineLearningServices/workspaces/ml-edu


In [4]:
import mlflow

# Configuring the tracking URI
mlflow.set_tracking_uri(mlflow_tracking_uri)

# Creating an MLflow experiment
experiment_name = 'cifar10_cnn_local'
experiment = mlflow.set_experiment(experiment_name)

In [62]:
from mlflow.entities import Experiment

# define function to get the best value of a specific metric of all runs in the experiment
def get_metrics_from_exp(experiment:Experiment, metric:str, status:str='FINISHED'):

    runs = mlflow.search_runs(
        filter_string = f"attributes.status = '{mlflow.entities.RunStatus.from_string(status)}'",
        experiment_ids=[experiment.experiment_id],
        output_format = 'pandas'
    )
    return runs[[f"metrics.{metric}"]].max()[0]

In [63]:
# import callback python script from code folder
from src.keras_azure_ml_cb import AzureMlKerasCallback

# Create a run
with mlflow.start_run() as run:
    # create an Azure Machine Learning monitor callback
    azureml_cb = AzureMlKerasCallback()

    # train the model for a certain number of epochs
    model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_split = 0.2,
            shuffle=True,
            callbacks=[azureml_cb, checkpoint_cb])

    # load the overall best model of all epochs into the model object
    model = tf.keras.models.load_model(model_path)

    # evaluate the best model against the test dataset and log them to Azure ML
    scores = model.evaluate(x_test, y_test, verbose=1)
    print('Test loss of best model:', scores[0])
    mlflow.log_metric('Test loss', scores[0])
    print('Test accuracy of best model:', scores[1])
    mlflow.log_metric('Test accuracy', scores[1])

    # Upload the model binary file(s) of the best model
    mlflow.tensorflow.log_model(model,model_name,input_example=x_train[:3,:])
    
    # get the best accuracy out of every run before
    best_test_acc = get_metrics_from_exp(experiment,'Test accuracy')

    # Register the best model if it is better than in any previous model training
    if scores[1] > best_test_acc:
        model_uri = f"runs:/{run.info.run_id}/{model_name}"
        mlflow.register_model(model_uri, model_name )
        # runs = mlflow.search_runs(experiment_names=[experiment.name], output_format='list')
        # run_id = runs[0].info.run_id
        # artifact_path = 'model'
        # mlflow.register_model(f"runs:/{run_id}/{artifact_path}", model_name)


Epoch 1/5


2023-08-14 14:42:16.405903: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 491520000 exceeds 10% of free system memory.


Epoch 2/5
Epoch 3/5
Epoch 4/5


INFO:tensorflow:Assets written to: /workspaces/miniconda/Mastering-Azure-Machine-Learning-Second-Edition/chapter03/outputs/keras_cifar10_trained_model.tf/assets


Epoch 5/5
Test loss of best model: 0.8309738039970398
Test accuracy of best model: 0.7125999927520752
INFO:tensorflow:Assets written to: /tmp/tmprbq1z_8b/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmprbq1z_8b/model/data/model/assets


TypeError: 'numpy.float64' object is not iterable

In [64]:
scores[1]

0.7125999927520752

In [53]:
runs = mlflow.search_runs(
    # output_format = 'list',
    filter_string = f"attributes.status = '{mlflow.entities.RunStatus.from_string('FINISHED')}'",
    experiment_ids=[experiment.experiment_id]
)
display(runs)



Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.val_accuracy,metrics.accuracy,metrics.val_loss,metrics.loss,...,params.workers,params.max_queue_size,params.shuffle,params.validation_freq,params.opt_beta_2,params.sample_weight,params.class_weight,tags.mlflow.runName,tags.mlflow.rootRunId,tags.mlflow.user
0,06b635ad-7512-4504-9d50-e47ce22b8339,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-07-26 07:05:20.419000+00:00,2023-07-26 07:20:05.204000+00:00,0.6418,0.58745,1.031453,1.167487,...,1.0,10.0,True,1.0,0.999,,,boring_hair_jj7d4kkg,06b635ad-7512-4504-9d50-e47ce22b8339,Martin Spengler
1,63910742-ac57-4536-91d4-9a03f699a603,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-07-26 13:25:34.917000+00:00,2023-07-26 13:25:34.917000+00:00,,,,,...,,,,,,,,serene_stomach_1lbv3g4n,63910742-ac57-4536-91d4-9a03f699a603,Martin Spengler
2,d774a3c9-8f08-47c7-8e2d-f655dd43bf62,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-07-26 13:25:44.352000+00:00,2023-07-26 13:25:44.352000+00:00,,,,,...,,,,,,,,sincere_peach_z3m1h3xw,d774a3c9-8f08-47c7-8e2d-f655dd43bf62,Martin Spengler
3,b40cd57f-b907-460e-a6eb-985cf8d721af,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-07-26 13:25:51.587000+00:00,2023-07-26 13:25:51.587000+00:00,,,,,...,,,,,,,,zen_stamp_gcyxk9dx,b40cd57f-b907-460e-a6eb-985cf8d721af,Martin Spengler
4,72b44cd3-2dab-494c-895b-27628b2d4a9e,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-07-26 13:49:45.253000+00:00,2023-07-26 14:05:29.158000+00:00,0.6499,0.58215,1.008217,1.179274,...,1.0,10.0,True,1.0,0.999,,,loyal_cheetah_j4535djh,72b44cd3-2dab-494c-895b-27628b2d4a9e,Martin Spengler
5,e5016c3f-1e94-4382-aec4-1b23b0dbbbaf,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-08-01 15:36:26.793000+00:00,2023-08-01 15:51:45.969000+00:00,0.6431,0.581875,1.045737,1.183045,...,1.0,10.0,True,1.0,0.999,,,blue_gold_4zzg57tw,e5016c3f-1e94-4382-aec4-1b23b0dbbbaf,Martin Spengler
6,3747967b-ff1d-4728-a6bc-e709c41639c2,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-08-14 09:00:16.967000+00:00,2023-08-14 09:02:14.197000+00:00,0.642,0.586475,1.045111,1.170261,...,1.0,10.0,True,1.0,0.999,,,wheat_chain_tvxhdr34,3747967b-ff1d-4728-a6bc-e709c41639c2,Martin Spengler
7,7e46516b-2864-4161-a578-2cd39d1f7a93,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-08-14 11:59:57.445000+00:00,2023-08-14 12:01:53.969000+00:00,0.6461,0.58455,1.035101,1.177739,...,1.0,10.0,True,1.0,0.999,,,happy_seed_t3ktz4wx,7e46516b-2864-4161-a578-2cd39d1f7a93,Martin Spengler
8,14155c7d-dc60-4fd0-8bfd-e544e405f9a8,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-08-14 12:19:00.041000+00:00,2023-08-14 12:20:58.741000+00:00,0.6458,0.58245,1.025846,1.182506,...,1.0,10.0,True,1.0,0.999,,,teal_spring_16k23z70,14155c7d-dc60-4fd0-8bfd-e544e405f9a8,Martin Spengler
9,d7896875-7ec3-4e7b-8761-fa51c3e6b536,d1cf2d6c-b32f-437c-9d6e-6f0cc7882bc9,FINISHED,,2023-08-14 13:41:09.309000+00:00,2023-08-14 13:43:02.094000+00:00,0.7087,0.67095,0.842959,0.927932,...,,,,,,,,dreamy_yogurt_0hx7ffr9,d7896875-7ec3-4e7b-8761-fa51c3e6b536,Martin Spengler


In [60]:
metric = 'Test accuracy'
runs[[f"metrics.{metric}"]].max()[0]

0.7056999802589417