# Create and run an experiment  to train a classification model

Testing different building blocks to train and register a ML model. 

1. Set up different alternative compute targets
2. Set up a defined training environment
3. Use registered datasets to have registered data versions
4. Create Experiment that collects and documents all the above, so the model training can be repeated
5. Register model -> there is versions for the models

In [None]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
# print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

In [None]:
# read config data about blob storage
import yaml
import os

with open("config.yml", "r") as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

### Create a folder for the experiment files

In [None]:
import os

# Create a folder for the experiment files
experiment_folder = '../classification_scripts/'
os.makedirs(experiment_folder, exist_ok=True)
print('Experiment folder:',experiment_folder)

### Define and create two compute targets: one CPU cluster, one GPU cluster

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "laskukone11"
gpucluster_name = "laskukone-nc6"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing CPU-cluster:',training_cluster.name)
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_DS11_V2', 
            min_nodes=0,
            max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)
    


try:
    compute_target = ComputeTarget(workspace=ws, name=gpucluster_name)
    print('Found existing GPU-cluster:', compute_target.name)
except ComputeTargetException:
    print('creating new.')
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='Standard_NC6',
        min_nodes=0,
        max_nodes=1)
    compute_target = ComputeTarget.create(ws, gpucluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

### Define the environment

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

# build on top of an existing environment
defined_env_name="AzureML-Tutorial"
preconf_env = Environment.get(workspace=ws, name=defined_env_name)
own_mod=preconf_env.clone(new_name='own_mod')

#enable docker
own_mod.docker.enabled = True
own_mod.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'

conda_dep = own_mod.python.conda_dependencies

# Install conda packages
conda_dep.add_conda_package('scikit-learn')
conda_dep.add_conda_package('pandas')
conda_dep.add_conda_package('matplotlib')
conda_dep.add_conda_package('numpy')
conda_dep.add_conda_package('tqdm')

# install pip packeges
conda_dep.add_pip_package("azureml-pipeline-core")
conda_dep.add_pip_package("azureml-widgets")
conda_dep.add_pip_package("azureml-pipeline-steps")
conda_dep.add_pip_package("azureml-train")
conda_dep.add_pip_package("azureml-sdk")
conda_dep.add_pip_package("azureml-interpret")
conda_dep.add_pip_package('azureml-defaults')
conda_dep.add_pip_package( 'azureml-dataprep[pandas]')

# we dont need these these packeges from the existing environment 
# and they mess up the build, result to unsolvable problems
conda_dep.remove_pip_package("azureml-automl-runtime==1.19.0")
conda_dep.remove_pip_package("azureml-train-automl-client==1.19.0")
conda_dep.remove_pip_package("azureml-train-automl-runtime==1.19.0")
conda_dep.remove_pip_package("azureml-train-automl==1.19.0")
conda_dep.remove_pip_package("azureml-automl-core==1.19.0")

#Add tensorflow package: does not work as normal conda adding, strange
conda_dep.add_tensorflow_conda_package(core_type='gpu', version="2.2")

conda_dep.set_python_version(version="3.8")


own_mod.register(ws)

#own_mod

### Get the training dataset

In [None]:
concentration_ds = ws.datasets.get(cfg['datafiles']['conc_datasetname'])
classification_ds=ws.datasets.get(cfg['datafiles']['class_datasetname'])
#print(classification_ds)

### Create and run Experiment

In [None]:

from azureml.train.estimator import Estimator
from azureml.core import Experiment
from azureml.widgets import RunDetails

compute_target='local'
compute_target="laskukone-nc6"
#compute_target='laskukone11'

# Create an estimator
estimator = Estimator(source_directory=experiment_folder,
                      inputs=[concentration_ds.as_named_input('hyde_data'),
                             classification_ds.as_named_input('class_data')],
                      compute_target = compute_target,
                      environment_definition = own_mod,
                      entry_script='aerosol_training.py')

# Create an experiment
experiment = Experiment(workspace = ws, name = 'aerosol-training')

# Run the experiment
run = experiment.submit(config=estimator)
# Show the run details while running
RunDetails(run).show()
run.wait_for_completion()

model = run.register_model(model_name='aerosol_classification',
                           model_path='outputs/aerosol_classification_model.h5')
print(model.name, model.id, model.version, sep='\t')

In [None]:
# this is used to debug the run

print(run.get_details_with_logs()['logFiles']['azureml-logs/70_driver_log.txt'])

