In [49]:
import sys
sys.path.append('..')

import yaml

from mlspeclib.mlobject import MLObject
from mlspeclib.mlschemaenums import MLSchemaTypes
from mlspeclib.mlschema import MLSchema
import os
import uuid
import datetime
from pathlib import Path 


In [50]:
# Run ID - we'd normally get this from something external
run_id = uuid.uuid4()

# Local directory for testing
working_dir = Path('.') / 'tmp_dir' / str(run_id)
working_dir.mkdir(parents=True, exist_ok=True)
print (working_dir)

tmp_dir/07950233-b311-4031-8630-47be2697ef95


In [51]:
datapath_object = MLObject()
datapath_object.set_type('0.0.1', MLSchemaTypes.DATAPATH)

In [52]:
datapath_object.run_id = run_id
datapath_object.step_id = uuid.uuid4()
datapath_object.run_date = datetime.datetime.now()

datapath_object.data_store = None  # This is an intentional bug

# This is an intentional bug (Should be AWS_BLOB)
datapath_object.storage_connection_type = 'AWS_BLOB_OBJECT'
datapath_object.connection.endpoint = None  # Another intentional bug

datapath_object.connection.access_key_id = 'AKIAIOSFODNN7EXAMPLE'
datapath_object.connection.secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'

response, errors = datapath_object.save(working_dir)
print(errors)

{'data_store': ['Field may not be null.'], 'connection': {'endpoint': ['Field may not be null.']}, 'storage_connection_type': ['Must be one of: CUSTOM, AWS_BLOB, GCP_BLOB, AZURE_BLOB, NFS_BLOB, SMB_BLOB.']}


In [53]:
datapath_object.data_store = 'Titanic_Data_Store'
datapath_object.storage_connection_type = 'AWS_BLOB'
datapath_object.connection.endpoint = 'https://s3.us-west-2.amazonaws.com/titanic_data_store'

response, errors = datapath_object.save(working_dir)
print(errors)

{}


In [54]:
print(datapath_object.get_file_path())

tmp_dir/07950233-b311-4031-8630-47be2697ef95/0_0_1_datapath-2020-04-29T18:19:05.426549.yaml


In [55]:
input_datapath_object, _ = MLObject.create_object_from_file(datapath_object.get_file_path())

In [56]:
print(input_datapath_object.connection.endpoint)

https://s3.us-west-2.amazonaws.com/titanic_data_store


In [57]:

training_execution = MLObject()
training_execution.set_type('0.0.1', MLSchemaTypes.TRAIN_EXECUTION)

training_execution.run_id = run_id
training_execution.step_id = uuid.uuid4()
training_execution.run_date = datetime.datetime.now()
training_execution.trainable = True

# INTENTIONAL BUG (mispelled 'container' as 'container')
training_execution.training_container_image.conatiner_image_url = "https://docker.io/tensorflow/tensorflow:latest-gpu-py3"
training_execution.training_container_image.container_store = 'training-container-store'
training_execution.tested_platforms = ['wml', 'kubeflow']

training_execution.model_source.initial_model.data_store = input_datapath_object.data_store
training_execution.model_source.initial_model.path = '/tmp/initial_models'
training_execution.model_source.initial_model.bucket = 'titanic-model-bucket'
training_execution.model_source.initial_model.url = 'https://artifacts.contoso.com/models'

training_execution.model_source.initial_model_local.path = '/tmp/local_models'

training_execution.training_params.epoch = 500
training_execution.training_params.learning_rate = 0.003
training_execution.training_params.loss = 5.3
training_execution.training_params.batch_size = 1000
training_execution.training_params.optimizer = ['Adam', 'SGD']
training_execution.training_params.train_op = 'tanh'
training_execution.training_params.other_tags = {
    'trainingIters': 20000,
    'contains_pii': False,
    'dataset_sha': '24a95219b3fce8402561d6b713bb435d6d5d51f2132d3c32703df8562db5b718'
}

training_execution.evaluation_metrics.metrics_type = 'tensorboard'
training_execution.evaluation_metrics.directory = 'job_states/logs/tb/test'

training_execution.execution.command = f"""
python3 convolutional_network.py 
    --trainImagesFile /model_training_data/train-images-idx3-ubyte.gz
    --trainLabelsFile /model_training_data/train-labels-idx1-ubyte.gz 
    --testImagesFile /model_training_data/t10k-images-idx3-ubyte.gz
    --testLabelsFile /model_training_data/t10k-labels-idx1-ubyte.gz 
    --learningRate {training_execution.training_params.learning_rate}
    --trainingIters {training_execution.training_params.other_tags['trainingIters']}
"""

training_execution.execution.compute_configuration.name = 'k80'
training_execution.execution.compute_configuration.nodes = 1

training_execution.mount_type = 'mount_cos'

training_execution.model_training_results.trained_model.data_store = 'age_datastore'
training_execution.model_training_results.trained_model.path = '1.0/assets/'
training_execution.model_training_results.trained_model.bucket = 'facial-age-estimator'
training_execution.model_training_results.trained_model.url = 'https://artifacts.contoso.com/model_results'

training_execution.model_training_results.trained_model_local.path = '/tmp/local_model_data/1.0/assets/'

training_execution.data_source.training_data_local.path = '/tmp/local_model_data/1.0/assets/'

training_execution.data_source.training_data.data_store = 'age_datastore'
training_execution.data_source.training_data.path = '1.0/assets/'
training_execution.data_source.training_data.bucket = 'facial-age-estimator'
training_execution.data_source.training_data.training_data_url = 'https://artifacts.contoso.com/training_data' 

training_execution.save(working_dir)

(False,
 {'training_container_image': {'conatiner_image_url': ['Unknown field.']}})

In [58]:
# Fix the bug (need to figure out a more elegant way than popping stuff)
training_execution.training_container_image.pop('conatiner_image_url')
training_execution.training_container_image.container_image_url = "https://docker.io/tensorflow/tensorflow:latest-gpu-py3"

training_execution.save(working_dir)

(True, {})

In [59]:
execution_command = f"python train.py '{training_execution.to_yaml()}'" 
result = !{execution_command}

results_dict = {}
for item in result:
    k, v = item.split(':')
    results_dict[k] = v

print(results_dict)

{'accuracy': ' 0.91890001', 'global_step': ' 1000', 'loss': ' 0.28656715'}


In [60]:
training_result = MLObject()
training_result.set_type('0.0.1', MLSchemaTypes.TRAIN_RESULTS)

training_result.run_id = run_id
training_result.step_id = uuid.uuid4()
training_result.run_date = datetime.datetime.now()
training_result.training_execution_id = training_execution.step_id
training_result.accuracy = results_dict['accuracy']
training_result.global_step = results_dict['global_step']
training_result.loss = results_dict['loss']

training_result.save(working_dir)

(True, {})

In [61]:
os.listdir(working_dir)

['0_0_1_datapath-2020-04-29T18:19:05.426549.yaml',
 '0_0_1_train_execution-2020-04-29T18:19:07.586315.yaml',
 '0_0_1_train_results-2020-04-29T18:19:09.230867.yaml']

In [62]:
environment_object = MLObject()
environment_object.set_type('0.1.0', MLSchemaTypes.ENVIRONMENT)

In [63]:
environment_object.run_id = run_id
environment_object.step_id = uuid.uuid4()
environment_object.run_date = datetime.datetime.now()
environment_object.docker.baseImage = 'https://mcr.microsoft.com/azureml/base:0.2.2'
environment_object.docker.python.interpreterPath = 'python'
environment_object.docker.python.conda.dependencies = [
    "python=3.6.2",
    {'pip': ['torch', 'torchvision']}
]

In [64]:
environment_object.save(working_dir)

(True, {})

In [65]:
print(environment_object.get_file_path())

tmp_dir/07950233-b311-4031-8630-47be2697ef95/0_1_0_environment-2020-04-29T18:19:10.927671.yaml


In [66]:
environment_object1 = MLObject()
environment_object1.set_type('0.1.0', MLSchemaTypes.ENVIRONMENT)
environment_object1.validate()

{'docker': {'python': {'interpreterPath': ['Field may not be null.']},
  'baseImage': ['Field may not be null.']},
 'run_date': ['Field may not be null.'],
 'run_id': ['Field may not be null.'],
 'step_id': ['Field may not be null.']}

In [79]:
MLObject.code_gen('0.1.0', MLSchemaTypes.ENVIRONMENT, prefix='environment_var', type_hints=True)



environment_var = MLObject()
environment_var.set_type('0.1.0', 'environment')

#
# All required attributes
#

# environment_var.run_id expects -> UUID
environment_var.run_id =

# environment_var.step_id expects -> UUID
environment_var.step_id =

# environment_var.run_date expects -> DateTime
environment_var.run_date =

# environment_var.docker.baseImage expects -> String
environment_var.docker.baseImage =

# environment_var.docker.python.interpreterPath expects -> String
environment_var.docker.python.interpreterPath =

# environment_var.docker.python.conda.dependencies expects -> List
environment_var.docker.python.conda.dependencies =


#
# All optional attributes
#

# environment_var.custom expects -> Dict
environment_var.custom =



In [72]:
MLSchema.append_schema_to_registry(Path('external_schema/mlperf'))

In [73]:
MLObject.code_gen('0.5.0', 'dlrm', prefix="benchmark_code")



#
# All required attributes
#

# benchmark_code.global_batch_size expects -> Integer
benchmark_code.global_batch_size =

# benchmark_code.opt_base_learning_rate expects -> Float
benchmark_code.opt_base_learning_rate =


#
# All optional attributes
#

