In [5]:
import sys
sys.path.append('..')

import yaml

import mlspeclib
from mlspeclib.mlobject import MLObject
from mlspeclib.mlschemaenums import MLSchemaTypes
from mlspeclib.mlschema import MLSchema
import os
import uuid
import datetime
from pathlib import Path 
print(mlspeclib.__version__)

AttributeError: module 'mlspeclib' has no attribute '__version__'

In [2]:
# Run ID - we'd normally get this from something external
run_id = uuid.uuid4()

# Local directory for testing
working_dir = Path('.') / 'tmp_dir' / str(run_id)
working_dir.mkdir(parents=True, exist_ok=True)
print (working_dir)

tmp_dir/b04ef9a4-09b9-44f3-aab0-b4011755301d


In [None]:
datapath_object = MLObject()
datapath_object.set_type('0.0.1', MLSchemaTypes.DATAPATH)

In [None]:
datapath_object.run_id = run_id
datapath_object.step_id = uuid.uuid4()
datapath_object.run_date = datetime.datetime.now()

datapath_object.data_store = None  # This is an intentional bug

# This is an intentional bug (Should be AWS_BLOB)
datapath_object.storage_connection_type = 'AWS_BLOB_OBJECT'
datapath_object.connection.endpoint = None  # Another intentional bug

datapath_object.connection.access_key_id = 'AKIAIOSFODNN7EXAMPLE'
datapath_object.connection.secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'

response, errors = datapath_object.save(working_dir)
print(errors)

In [None]:
datapath_object.data_store = 'Titanic_Data_Store'
datapath_object.storage_connection_type = 'AWS_BLOB'
datapath_object.connection.endpoint = 'https://s3.us-west-2.amazonaws.com/titanic_data_store'

response, errors = datapath_object.save(working_dir)
print(errors)

In [None]:
print(datapath_object.get_file_path())

In [None]:
input_datapath_object, _ = MLObject.create_object_from_file(datapath_object.get_file_path())

In [None]:
print(input_datapath_object.connection.endpoint)

In [None]:
training_execution = MLObject()
training_execution.set_type('0.0.1', MLSchemaTypes.TRAIN_EXECUTION)

training_execution.run_id = run_id
training_execution.step_id = uuid.uuid4()
training_execution.run_date = datetime.datetime.now()
training_execution.trainable = True

# INTENTIONAL BUG (mispelled 'container' as 'container')
training_execution.training_container_image.conatiner_image_url = "https://docker.io/tensorflow/tensorflow:latest-gpu-py3"
training_execution.training_container_image.container_store = 'training-container-store'
training_execution.tested_platforms = ['wml', 'kubeflow']

training_execution.model_source.initial_model.data_store = input_datapath_object.data_store
training_execution.model_source.initial_model.path = '/tmp/initial_models'
training_execution.model_source.initial_model.bucket = 'titanic-model-bucket'
training_execution.model_source.initial_model.url = 'https://artifacts.contoso.com/models'

training_execution.model_source.initial_model_local.path = '/tmp/local_models'

training_execution.training_params.epoch = 500
training_execution.training_params.learning_rate = 0.003
training_execution.training_params.loss = 5.3
training_execution.training_params.batch_size = 1000
training_execution.training_params.optimizer = ['Adam', 'SGD']
training_execution.training_params.train_op = 'tanh'
training_execution.training_params.other_tags = {
    'trainingIters': 20000,
    'contains_pii': False,
    'dataset_sha': '24a95219b3fce8402561d6b713bb435d6d5d51f2132d3c32703df8562db5b718'
}

training_execution.evaluation_metrics.metrics_type = 'tensorboard'
training_execution.evaluation_metrics.directory = 'job_states/logs/tb/test'

training_execution.execution.command = f"""
python3 convolutional_network.py 
    --trainImagesFile /model_training_data/train-images-idx3-ubyte.gz
    --trainLabelsFile /model_training_data/train-labels-idx1-ubyte.gz 
    --testImagesFile /model_training_data/t10k-images-idx3-ubyte.gz
    --testLabelsFile /model_training_data/t10k-labels-idx1-ubyte.gz 
    --learningRate {training_execution.training_params.learning_rate}
    --trainingIters {training_execution.training_params.other_tags['trainingIters']}
"""

training_execution.execution.compute_configuration.name = 'k80'
training_execution.execution.compute_configuration.nodes = 1

training_execution.mount_type = 'mount_cos'

training_execution.model_training_results.trained_model.data_store = 'age_datastore'
training_execution.model_training_results.trained_model.path = '1.0/assets/'
training_execution.model_training_results.trained_model.bucket = 'facial-age-estimator'
training_execution.model_training_results.trained_model.url = 'https://artifacts.contoso.com/model_results'

training_execution.model_training_results.trained_model_local.path = '/tmp/local_model_data/1.0/assets/'

training_execution.data_source.training_data_local.path = '/tmp/local_model_data/1.0/assets/'

training_execution.data_source.training_data.data_store = 'age_datastore'
training_execution.data_source.training_data.path = '1.0/assets/'
training_execution.data_source.training_data.bucket = 'facial-age-estimator'
training_execution.data_source.training_data.training_data_url = 'https://artifacts.contoso.com/training_data' 

training_execution.save(working_dir)

In [None]:
# Fix the bug (need to figure out a more elegant way than popping stuff)
training_execution.training_container_image.pop('conatiner_image_url')
training_execution.training_container_image.container_image_url = "https://docker.io/tensorflow/tensorflow:latest-gpu-py3"

training_execution.save(working_dir)

In [None]:
execution_command = f"python3 train.py '{training_execution.to_yaml()}'" 
result = !{execution_command}

results_dict = {}
for item in result:
    k, v = item.split(':')
    results_dict[k] = v

print(results_dict)

In [None]:
training_result = MLObject()
training_result.set_type('0.0.1', MLSchemaTypes.TRAIN_RESULTS)

training_result.run_id = run_id
training_result.step_id = uuid.uuid4()
training_result.run_date = datetime.datetime.now()
training_result.training_execution_id = training_execution.step_id
training_result.accuracy = results_dict['accuracy']
training_result.global_step = results_dict['global_step']
training_result.loss = results_dict['loss']

training_result.save(working_dir)

In [None]:
os.listdir(working_dir)

In [None]:
environment_object = MLObject()
environment_object.set_type('0.1.0', MLSchemaTypes.ENVIRONMENT)

In [None]:
environment_object.run_id = run_id
environment_object.step_id = uuid.uuid4()
environment_object.run_date = datetime.datetime.now()
environment_object.docker.baseImage = 'https://mcr.microsoft.com/azureml/base:0.2.2'
environment_object.docker.python.interpreterPath = 'python'
environment_object.docker.python.conda.dependencies = [
    "python=3.6.2",
    {'pip': ['torch', 'torchvision']}
]

In [None]:
environment_object.save(working_dir)

In [None]:
print(environment_object.get_file_path())

In [None]:
environment_object1 = MLObject()
environment_object1.set_type('0.1.0', MLSchemaTypes.ENVIRONMENT)
environment_object1.validate()

In [None]:
MLObject.code_gen('0.0.1', MLSchemaTypes.RUNCONFIG, prefix='my_run_config', type_hints=False)

In [None]:
MLObject.code_gen('0.5.0', 'resnet', prefix="resnet_variables", type_hints=True)
# THIS IS AN ERROR - Registry error because not included


In [None]:
MLSchema.append_schema_to_registry(Path('external_schema/mlperf'))

In [None]:
MLObject.code_gen('0.5.0', 'resnet', prefix="resnet_variables", type_hints=True)


In [None]:
resnet_variables = MLObject()
resnet_variables.set_type('0.5.0', 'resnet')

#
# All required attributes
#

# resnet_variables.global_batch_size expects -> Integer
# THIS IS AN ERROR (should be >=4000)
resnet_variables.global_batch_size = 3000

# resnet_variables.lars_opt_base_learning_rate expects -> Float
resnet_variables.lars_opt_base_learning_rate = 0.05

# resnet_variables.lars_opt_end_learning_rate expects -> Float
resnet_variables.lars_opt_end_learning_rate = 0.03

# resnet_variables.lars_opt_learning_rate_decay_poly_power expects -> Integer
resnet_variables.lars_opt_learning_rate_decay_poly_power = 2

# resnet_variables.lars_opt_learning_rate_decay_steps expects -> Integer
resnet_variables.lars_opt_learning_rate_decay_steps = 1000

# resnet_variables.lars_epsilon expects -> Float
resnet_variables.lars_epsilon = 0.0005

# resnet_variables.lars_opt_learning_rate_warmup_epochs expects -> Integer
resnet_variables.lars_opt_learning_rate_warmup_epochs = 1000

# resnet_variables.lars_opt_momentum expects -> Integer
resnet_variables.lars_opt_momentum = 1591

# resnet_variables.lars_opt_weight_decay expects -> Float
resnet_variables.lars_opt_weight_decay = 195.31

resnet_variables.save(working_dir)

In [None]:
resnet_variables.global_batch_size = 4000
resnet_variables.save(working_dir)

In [None]:
os.listdir(working_dir)

In [None]:
print(working_dir)

In [None]:
b, errors = MLObject.create_object_from_file(resnet_variables.get_file_path())

In [None]:
print(errors)

In [None]:
print(b.lars_opt_end_learning_rate)

In [None]:
MLObject.code_gen('2.2.0', 'responsible_ai_result', prefix="rair", type_hints=True)

In [None]:
from mlspeclib.experimental.github import GitHubSchemas
load_url = "https://github.com/mlspec/mlspeclib-action-samples-schemas"
GitHubSchemas.add_schemas_from_github_url(load_url)

In [None]:
resnet_variables.save(store='metastoreservice.com')