# Step 1 - Get or create Workspace and create AML Compute cluster #

In [None]:
import azureml
from azureml.core import Run
from azureml.core import Workspace
from azureml.core.experiment import Experiment

print("SDK Version:", azureml.core.VERSION)

In [None]:
# Provide the Subscription ID of your existing Azure subscription
subscription_id = "xxx-xxx-xxx"

# Provide values for the Resource Group and Workspace that will be created
resource_group = "service-labs"
workspace_name = "service-labs-ws"
workspace_region = 'eastus'  # eastus, westcentralus, southeastasia, australiaeast, westeurope

In [None]:
# By using the exist_ok param, if the worskpace already exists we get a reference to the existing workspace
ws = Workspace.create(
    name = workspace_name,
    subscription_id = subscription_id,
    resource_group = resource_group, 
    location = workspace_region,
    exist_ok = True)

print("Workspace Provisioning complete.")

In [None]:
### Create AML CPU based Compute Cluster
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "amlcompute-ad"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6',
                                                           min_nodes=1, max_nodes=1)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True)

# Use the 'status' property to get a detailed status for the current AmlCompute. 
print(compute_target.status.serialize())

# Step 2 - Create training script #

In [None]:
%%writefile train.py
import numpy as np
import timeit
import keras
from keras import backend as K
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model as KModel
from keras.utils.vis_utils import plot_model
import azureml
from azureml.core import Run
from azureml.core.model import Model
import pickle

print("SDK Version:", azureml.core.VERSION)

# We use Fashion mnist dataset
from keras.datasets import fashion_mnist

# We download and load the data
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print('Fashion MNIST dataset loaded!')

# Build the encoder
input_img = Input(shape=(28, 28, 1))

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded_feature_vector = MaxPooling2D((2, 2), padding='same', name='feature_vector')(x)

# at this point the representation is (4, 4, 8) i.e. 128-dimensional compressed feature vector

# Build the decoder
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded_feature_vector)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded_output = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

# The first model is autoencoder model, it takes the input image and results in a decoded image
autoencoder_model = KModel(input_img, decoded_output)
# Compile the first model
autoencoder_model.compile(optimizer='adadelta', loss='binary_crossentropy')

# The second NN model is only a half of the first model
# it take the input image and gives the encoded vector as output
encoder_model = KModel(inputs=autoencoder_model.input, 
                      outputs=autoencoder_model.get_layer('feature_vector').output) #output from feature vector
# Compile the second model
encoder_model.compile(optimizer='adadelta', loss='binary_crossentropy')

print('')
print(autoencoder_model.summary())
print('')

# We need to scale the image from [0-255] to [0-1] for better performance of activation functions
x_train = x_train / 255.
x_test = x_test / 255.

# We train the NN in batches (groups of images), so we reshape the dataset
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))

print("Train dataset size is {0}".format(x_train.shape))
print("Test dataset size is {0}".format(x_test.shape))

print("Model training starting...")
start_time = timeit.default_timer()
# It takes several minutes to train this neural network, depending on the configuration of your cluster.
history=autoencoder_model.fit(x=x_train, y=x_train, epochs=10, batch_size=128, 
                                       shuffle=True, validation_data=(x_test, x_test), verbose=1)
elapsed_time = timeit.default_timer() - start_time
print("Model training completed.")
print('Elapsed time (min): ', round(elapsed_time/60.0,0))

os.makedirs('./outputs', exist_ok=True)

# save the models
autoencoder_model.save(os.path.join('./outputs', 'autoencoder_model.h5'))
encoder_model.save(os.path.join('./outputs', 'encoder_model.h5'))

# save training history
with open(os.path.join('./outputs', 'history.txt'), 'w') as f:
    f.write(str(history.history))

print("Models saved in ./outputs folder")
print("Saving model files completed.")

# Register the Models
run = Run.get_context()

os.chdir("./outputs")

model_path = 'autoencoder_model.h5'
model_name = 'fashion_autoencoder'
model_description = 'Autoencoder network for Fashion-MNIST dataset.'
model = Model.register(
    model_path=model_path,  # this points to a local file
    model_name=model_name,  # this is the name the model is registered as
    tags={"type": "autoencoder", "run_id": run.id},
    description=model_description,
    workspace=run.experiment.workspace
)

os.chdir("..")

print("Model registered: {} \nModel Description: {} \nModel Version: {}".format(model.name, 
                                                                                model.description, 
                                                                                model.version))


# Step 3 - Remotely Train the Autoencoder Network using the AML Compute #

In [None]:
from azureml.train.dnn import TensorFlow
from azureml.train.estimator import Estimator

keras_est = TensorFlow(source_directory='.',
                       compute_target=compute_target,
                       entry_script='train.py', 
                       conda_packages=['numpy==1.16.4'], 
                       pip_packages=['keras==2.3.1'], 
                       framework_version='2.0')

In [None]:
experiment_name = 'aml-service-lab05'
experiment = Experiment(ws, experiment_name)

In [None]:
run = experiment.submit(keras_est, tags = {"type": "autoencoder"})

## Monitor the run

Using the azureml Jupyter widget, you can monitor the training run. This will approximately take around 5-10 minutes to complete. Once the training is completed you can then download the trained models locally by running the **Download the trained models** cell.

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

Note that, if you are using **Visual Studio Code**, the RunDetails widget is currently not supported. Uncomment the line below and run the cell to monitor and wait for the experiment run to complete.

In [None]:
#run.wait_for_completion(show_output=True)

# Step 4 - Download the trained models #

In [None]:
# create an output folder in the current directory
os.makedirs('./outputs', exist_ok=True)

for f in run.get_file_names():
    if f.startswith('outputs'):
        output_file_path = os.path.join('./outputs', f.split('/')[-1])
        print('Downloading from {} to {} ...'.format(f, output_file_path))
        run.download_file(name=f, output_file_path=output_file_path)

In [None]:
!ls './outputs'

# Step 5 - Review registered model #

In [None]:
from azureml.core.model import Model

model_list = Model.list(ws)

latest_model = sorted(model_list, reverse=True, key = lambda x: x.created_time)[0]
latest_model_name = latest_model.name
latest_model_path = latest_model.get_model_path(latest_model_name, _workspace=ws)
latest_model_description = latest_model.description
latest_model_version = latest_model.version
latest_model_run_id = latest_model.tags.get("run_id")

print('Model name: ', latest_model_name)
print('Model path: ', latest_model_path)
print('Model description: ', latest_model_description)
print('Model version: ', latest_model_version)
print('Training run id: ', latest_model_run_id)