In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="cifar10_cnn_local")
print(exp)

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Experiment(Name: cifar10_cnn_local,
Workspace: mldemows)


In [2]:
import tensorflow
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import RMSprop
import os
import ssl

# Keras settings
batch_size = 32
num_classes = 10
epochs = 5
num_predictions = 20

# https://stackoverflow.com/questions/69687794/unable-to-manually-load-cifar10-dataset
ssl._create_default_https_context = ssl._create_unverified_context

# the data split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes)
y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# defining our model 
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# define model name and file locations
model_name = 'keras_cifar10_trained_model.h5'
model_output_dir = os.path.join(os.getcwd(), 'outputs')

# initiate RMSprop optimizer (https://keras.io/api/optimizers/rmsprop/)
opt = RMSprop(learning_rate=0.0001, decay=1e-6)

# define checkpoint function to only save the model after each epoch if it is "better"
# (decided based on the validation loss function) in the output file path
if not os.path.isdir(model_output_dir):
    os.makedirs(model_output_dir)
model_path = os.path.join(model_output_dir, model_name)
checkpoint_cb = ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True)

# define the loss function, optimizer and additionally tracked metrics of the model training
# (https://keras.io/api/losses/probabilistic_losses/#categoricalcrossentropy-class)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

2022-11-09 21:08:31.453080: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


2022-11-09 21:08:41.367093: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
try:
    # Create a run
    # run = exp.start_logging(outputs=None, snapshot_directory=None)
    # adding .venv to .amlignore worked as it was also trying to snapshot .venv which is huge
    run = exp.start_logging(outputs=None, snapshot_directory=".")

    # train the model for a certain number of epochs
    model.fit(x_train, y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split= 0.2,
        shuffle=True,
        callbacks=[checkpoint_cb])
    
    # TBD :Log metrics
   
    # Upload the best model
    run.upload_file(model_name, model_path)
    
    # Register the best model
    # Tfkeras will be deprecated soon. Use Model.Framework.TENSORFLOW instead.
    run.register_model(model_name, model_path=model_name, model_framework='TfKeras')
    run.complete()
except:
    run.cancel()
    raise