# Keras workflow

## Imports

In [1]:
import sys
import os
sys.path.append(os.path.split(os.path.split(os.getcwd())[0])[0])
config_filepath = os.path.join(os.getcwd(),"config/fit_config_array.json")
notebook_filepath = os.path.join(os.getcwd(),"fit.ipynb")
import uuid
import json
import datetime
import getpass

from mercury_ml.common import tasks
from mercury_ml.common import utils
from mercury_ml.common import containers as common_containers
from mercury_ml.keras import containers as keras_containers

# ## Helpers
#
# These functions will help with the flow of this particular notebook

Using TensorFlow backend.


In [2]:
def print_data_bunch(data_bunch):

    for data_set_name, data_set in data_bunch.__dict__.items():
        print("{} <{}>".format(data_set_name, type(data_set).__name__))
        for data_wrapper_name, data_wrapper in data_set.__dict__.items():
            print("  {} <{}>".format(data_wrapper_name, type(data_wrapper).__name__))
        print()
        
def maybe_transform(data_bunch, pre_execution_parameters):
    if pre_execution_parameters:
        return data_bunch.transform(**pre_execution_parameters)
    else:
        return data_bunch
        
def print_dict(d):
    print(json.dumps(d, indent=2))

def get_installed_packages():
    import pip
    try:
        from pip._internal.operations import freeze
    except ImportError:  # pip < 10.0
        from pip.operations import freeze

    packages = []
    for p in freeze.freeze():
        packages.append(p)

    return packages

## Config

#### Load config

In [3]:
config = utils.load_referenced_json_config(config_filepath)

In [4]:
print_dict(config)

{
  "global_references": {
    "number_of_classes": 3,
    "batch_size": 2,
    "labels": [
      0,
      1,
      2
    ]
  },
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_array",
    "session_id": "{session_id}",
    "model_object_name": "{model_purpose}__{session_id}",
    "data_bunch_name": "array_123",
    "notebook_filepath": "{notebook_filepath}",
    "config_filepath": "{config_filepath}"
  },
  "init": {
    "read_source_data": {
      "name": "read_disk_pandas"
    },
    "define_model": {
      "name": "define_mlp_simple"
    },
    "get_optimizer": {
      "name": "get_keras_optimizer"
    },
    "get_loss_function": {
      "name": "get_keras_loss"
    },
    "compile_model": {
      "name": "compile_model"
    },
    "fit": {
      "name": "fit"
    },
    "save_model": {
      "names": [
        "save_hdf5",
        "save_tensorflow_serving_predict_signature_def"
      ]
    },
    "copy_from_local_to_remote": {
      "name": "copy_

#### Set model_id

In [5]:
session_id = str(uuid.uuid4().hex)

In [6]:
print(session_id)

b02a1c74a09642639b46f7d0457d5e2e


#### Update config

The function `utils.recursively_update_config(config, string_formatting_dict)` allows us to use string formatting to replace placeholder strings with acctual values.

for example: 

```python
>>> config = {"some_value": "some_string_{some_placeholder}"}
>>> string_formatting_dict = {"some_placeholder": "ABC"}
>>> utils.recursively_update_config(config, string_formatting_dict)
>>> print(config)
{"some_value": "some_string_ABC}"}
```



First update `config["meta_info"]`

In [7]:
utils.recursively_update_config(config["meta_info"], {
    "session_id": session_id,
    "model_purpose": config["meta_info"]["model_purpose"],
    "config_filepath": config_filepath,
    "notebook_filepath": notebook_filepath
})

Then use `config["meta_info"]` to update the rest.

In [8]:
utils.recursively_update_config(config, config["meta_info"])

In [9]:
print_dict(config)

{
  "global_references": {
    "number_of_classes": 3,
    "batch_size": 2,
    "labels": [
      0,
      1,
      2
    ]
  },
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_array",
    "session_id": "b02a1c74a09642639b46f7d0457d5e2e",
    "model_object_name": "test_array__b02a1c74a09642639b46f7d0457d5e2e",
    "data_bunch_name": "array_123",
    "notebook_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
    "config_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_array.json"
  },
  "init": {
    "read_source_data": {
      "name": "read_disk_pandas"
    },
    "define_model": {
      "name": "define_mlp_simple"
    },
    "get_optimizer": {
      "name": "get_keras_optimizer"
    },
    "get_loss_function": {
      "name": "get_keras_loss"
    },
    "compile_model": {
      "name": "compile_model"
    },
    "fit": {
      "name": "fit"

## Session

Create a small dictionary with the session information. This will later be stored as a dictionary artifact with all the key run infomration

In [10]:
session = {
    "time_stamp": datetime.datetime.utcnow().isoformat()[:-3] + "Z",
    "run_by": getpass.getuser(),
    "meta_info": config["meta_info"],
    "installed_packages": get_installed_packages()
}

In [11]:
print("Session info")
print(json.dumps(session, indent=2))

Session info
{
  "time_stamp": "2019-03-07T14:39:22.427Z",
  "run_by": "karl.schriek",
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_array",
    "session_id": "b02a1c74a09642639b46f7d0457d5e2e",
    "model_object_name": "test_array__b02a1c74a09642639b46f7d0457d5e2e",
    "data_bunch_name": "array_123",
    "notebook_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
    "config_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_array.json"
  },
  "installed_packages": [
    "absl-py==0.7.0",
    "astor==0.7.1",
    "attrs==19.1.0",
    "backcall==0.1.0",
    "bleach==3.1.0",
    "certifi==2018.11.29",
    "chardet==3.0.4",
    "colorama==0.4.1",
    "decorator==4.3.2",
    "defusedxml==0.5.0",
    "docutils==0.14",
    "entrypoints==0.3",
    "gast==0.2.2",
    "grpcio==1.19.0",
    "h5py==2.9.0",
    "idna==2.8",
    "ipykernel==5.1.0",
    "

## Initialization

Theseare the functions or classes we will be using in this workflow. We get / instatiate them all at the beginning using parameters under `config["initialization"]`.

Here we use mainly use `getattr` to fetch them via the `containers` module based on a string input in the config file. Providers could however also be fetched directly. The following three methods are all equivalent:

```python
# 1. (what we are using in this notebook)
from ml_workflow.common import containers as common_containers
source_reader=getattr(common_containers.SourceReaders, "read_pandas_data_set")

# 2. 
from ml_workflow.common import containers as common_containers
source_reader=common_containers.SourceReaders.read_pandas_data_set

# 3.
from ml_workflow.common.providers.source_reading import read_pandas_data_set
source_reader=read_pandas_data_set
```


### Helpers

These helper functions will create instantiate class providers (`create_and_log`) or fetch function providers (`get_and_log`) based on the parameters provided

In [12]:
def create_and_log(container, class_name, params):
    provider = getattr(container, class_name)(**params)
    print("{}.{}".format(container.__name__, class_name))
    print("params: ", json.dumps(params, indent=2))
    return provider

def get_and_log(container, function_name):
    provider = getattr(container, function_name)
    print("{}.{}".format(container.__name__, function_name))
    return provider

### Common

These are providers that are universally relevant, regardless of which Machine Learning engine is used.

In [13]:
# a function for storing dictionary artifacts to local disk
store_artifact_locally = get_and_log(common_containers.LocalArtifactStorers,
                                     config["init"]["store_artifact_locally"]["name"])

LocalArtifactStorers.store_dict_json


In [14]:
# a function for storing data-frame-like artifacts to local disk
store_prediction_artifact_locally = get_and_log(common_containers.LocalArtifactStorers,
                                                config["init"]["store_prediction_artifact_locally"]["name"])

LocalArtifactStorers.store_pandas_pickle


In [15]:
# a function for copy artifacts from local disk to a remote store
copy_from_local_to_remote = get_and_log(common_containers.ArtifactCopiers, config["init"]["copy_from_local_to_remote"]["name"])

ArtifactCopiers.copy_from_disk_to_disk


In [16]:
# a function for reading source data. When called it will return an instance of type DataBunch 
read_source_data_set = get_and_log(common_containers.SourceReaders, config["init"]["read_source_data"]["name"])

SourceReaders.read_disk_pandas


In [17]:
# a dictionary of functions that calculate custom metrics
custom_metrics_dict = {
    custom_metric_name: get_and_log(common_containers.CustomMetrics, custom_metric_name) for custom_metric_name in config["init"]["custom_metrics"]["names"]
}

CustomMetrics.evaluate_numpy_auc
CustomMetrics.evaluate_numpy_micro_auc


In [18]:
# a dictionary of functions that calculate custom label metrics
custom_label_metrics_dict = {
    custom_label_metric_name: get_and_log(common_containers.CustomLabelMetrics, custom_label_metric_name) for custom_label_metric_name in config["init"]["custom_label_metrics"]["names"]
}

CustomLabelMetrics.evaluate_numpy_accuracy
CustomLabelMetrics.evaluate_numpy_confusion_matrix


### Keras

In [19]:
# a function that returns an uncompiled keras model
define_model = get_and_log(keras_containers.ModelDefinitions, 
                           config["init"]["define_model"]["name"])

ModelDefinitions.define_mlp_simple


In [20]:
# a function that returns a keras loss function
get_loss_function = get_and_log(keras_containers.LossFunctionFetchers, 
                                config["init"]["get_loss_function"]["name"])

LossFunctionFetchers.get_keras_loss


In [21]:
# a function that returns a keras optimizer
get_optimizer = get_and_log(keras_containers.OptimizerFetchers, 
                           config["init"]["get_optimizer"]["name"])

OptimizerFetchers.get_keras_optimizer


In [22]:
# a function that returns a compiled keras model
compile_model = get_and_log(keras_containers.ModelCompilers, 
                            config["init"]["compile_model"]["name"])

ModelCompilers.compile_model


In [23]:
# a function that fits a compiled keras model
fit = get_and_log(keras_containers.ModelFitters, config["init"]["fit"]["name"])

ModelFitters.fit


In [24]:
# a list of functions that serve as callback when fitting a keras model
callbacks = []
for callback in config["init"]["callbacks"]:
    callbacks = callbacks + [get_and_log(keras_containers.CallBacks, callback["name"])(callback["params"])]

CallBacks.early_stopping
CallBacks.model_checkpoint


In [25]:
# a function for evaluating keras metrics
evaluate = get_and_log(keras_containers.ModelEvaluators, config["init"]["evaluate"]["name"])

ModelEvaluators.evaluate


In [26]:
# a dictionary of functions that save keras models in various formats
save_model_dict = {
    save_model_function_name: get_and_log(keras_containers.ModelSavers, save_model_function_name) for save_model_function_name in config["init"]["save_model"]["names"]
}

ModelSavers.save_hdf5
ModelSavers.save_tensorflow_serving_predict_signature_def


In [27]:
# a function that predictions using a keras model
predict = get_and_log(keras_containers.PredictionFunctions, config["init"]["predict"]["name"])

PredictionFunctions.predict


## Execution

Here we use the providers defined above to execute various tasks

### Get source data

In [28]:
data_bunch_source = tasks.read_train_valid_test_data_bunch(read_source_data_set,**config["exec"]["read_source_data"]["params"] )
print("Source data read using following parameters: \n")
print_dict(config["exec"]["read_source_data"]["params"])

Source data read using following parameters: 

{
  "train_params": {
    "path": "./example_data/array_123/train.csv",
    "input_format": ".csv",
    "full_data_columns": [
      "ID",
      "ID2",
      "field1_num",
      "field2_num",
      "field3_num",
      "field4_target",
      "field5_target",
      "field6_target"
    ],
    "index_columns": [
      "ID",
      "ID2"
    ],
    "features_columns": [
      "field1_num",
      "field2_num",
      "field3_num"
    ],
    "targets_columns": [
      "field4_target",
      "field5_target",
      "field6_target"
    ]
  },
  "valid_params": {
    "path": "./example_data/array_123/valid.csv",
    "input_format": ".csv",
    "full_data_columns": [
      "ID",
      "ID2",
      "field1_num",
      "field2_num",
      "field3_num",
      "field4_target",
      "field5_target",
      "field6_target"
    ],
    "index_columns": [
      "ID",
      "ID2"
    ],
    "features_columns": [
      "field1_num",
      "field2_num",
      "fiel

In [29]:
print("Read data_bunch consists of: \n")
print_data_bunch(data_bunch_source)

Read data_bunch consists of: 

train <DataSet>
  full_data <PandasDataWrapper>
  index <PandasDataWrapper>
  features <PandasDataWrapper>
  targets <PandasDataWrapper>

valid <DataSet>
  full_data <PandasDataWrapper>
  index <PandasDataWrapper>
  features <PandasDataWrapper>
  targets <PandasDataWrapper>

test <DataSet>
  full_data <PandasDataWrapper>
  index <PandasDataWrapper>
  features <PandasDataWrapper>
  targets <PandasDataWrapper>



### Define model

In [30]:
model = define_model(**config["exec"]["define_model"]["params"])

Instructions for updating:
Colocations handled automatically by placer.


In [31]:
print("Model defintion used:", config["init"]["define_model"]["name"], "\n")
print("Model parameters used: ")
print_dict(config["exec"]["define_model"]["params"])

Model defintion used: define_mlp_simple 

Model parameters used: 
{
  "nb_classes": 3,
  "nb_features": 3,
  "dense_activation": "relu",
  "final_activation": "softmax"
}


### Compile model

In [32]:
optimizer = get_optimizer(**config["exec"]["get_optimizer"]["params"])

In [33]:
print("Optimizer fetched with following parameters: ")
print_dict(config["exec"]["get_optimizer"]["params"])

Optimizer fetched with following parameters: 
{
  "optimizer_name": "adam",
  "optimizer_params": {
    "lr": 0.0001
  }
}


In [34]:
loss = get_loss_function(**config["exec"]["get_loss_function"]["params"])

In [35]:
print("Loss function fetched with following parameters: ")
print_dict(config["exec"]["get_optimizer"]["params"])

Loss function fetched with following parameters: 
{
  "optimizer_name": "adam",
  "optimizer_params": {
    "lr": 0.0001
  }
}


In [36]:
model = compile_model(model=model,
                      optimizer=optimizer,
                      loss=loss,
                      **config["exec"]["compile_model"]["params"])

### Fit model

##### Transform data

In [37]:
data_bunch_fit = maybe_transform(data_bunch_source, config["exec"]["fit"].get("pre_execution_transformation"))

print("Data transformed with following parameters: \n")
print_dict(config["exec"]["fit"].get("pre_execution_transformation"))

Data transformed with following parameters: 

{
  "data_set_names": [
    "train",
    "valid",
    "test"
  ],
  "transform_then_slice": true,
  "params": {
    "transform_to": "numpy",
    "full_data_wrapper_params": {},
    "data_wrapper_names": [
      "features",
      "targets",
      "index"
    ]
  }
}


In [38]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_fit)

Transformed data_bunch consists of: 

train <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>

valid <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>

test <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>



##### Perform fitting

In [39]:
model = fit(model = model,
            data_bunch = data_bunch_fit,
            callbacks = callbacks,
            **config["exec"]["fit"]["params"])

Instructions for updating:
Use tf.cast instead.
Train on 20 samples, validate on 20 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
return_best_model set to False. Returning model from last epoch


In [40]:
actual_epochs=len(model.history.history["acc"])
print("Actual trained epochs: {}".format(actual_epochs))

Actual trained epochs: 10


In [41]:
print("Training history: ")
print_dict(model.history.history)

Training history: 
{
  "val_loss": [
    1.9712057530879974,
    1.9557744354009627,
    1.9376160383224488,
    1.919517183303833,
    1.9021284461021424,
    1.8850532650947571,
    1.869289606809616,
    1.8533921867609024,
    1.8374166697263719,
    1.8228549987077713
  ],
  "val_acc": [
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5
  ],
  "loss": [
    1.9814257144927978,
    1.9620218321681022,
    1.9469922117888927,
    1.930476374924183,
    1.9130401119589806,
    1.8972187697887422,
    1.8790043979883193,
    1.8635313004255294,
    1.8478590726852417,
    1.830685842037201
  ],
  "acc": [
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5,
    0.5
  ]
}


In [42]:
session["actual_epochs"]=actual_epochs
session["history"]=model.history.history

### Save (formatted) config

In [43]:
tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote, config,
                      **config["exec"]["save_formatted_config"]["params"])

In [44]:
print("Config stored with following parameters")
print_dict(config["exec"]["save_formatted_config"]["params"])

Config stored with following parameters
{
  "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/session",
  "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/session",
  "filename": "config_formatted"
}


### Save Session

##### Save session info

In [45]:
tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote, session,
                      **config["exec"]["save_session"]["params"])

In [46]:
print("Session dictionary stored with following parameters")
print_dict(config["exec"]["save_session"]["params"])

Session dictionary stored with following parameters
{
  "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/session",
  "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/session",
  "filename": "session"
}


In [47]:
print_dict(config)

{
  "global_references": {
    "number_of_classes": 3,
    "batch_size": 2,
    "labels": [
      0,
      1,
      2
    ]
  },
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_array",
    "session_id": "b02a1c74a09642639b46f7d0457d5e2e",
    "model_object_name": "test_array__b02a1c74a09642639b46f7d0457d5e2e",
    "data_bunch_name": "array_123",
    "notebook_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
    "config_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_array.json"
  },
  "init": {
    "read_source_data": {
      "name": "read_disk_pandas"
    },
    "define_model": {
      "name": "define_mlp_simple"
    },
    "get_optimizer": {
      "name": "get_keras_optimizer"
    },
    "get_loss_function": {
      "name": "get_keras_loss"
    },
    "compile_model": {
      "name": "compile_model"
    },
    "fit": {
      "name": "fit"

}


##### Save session artifacts

In [48]:
for artifact_dict in config["exec"]["save_session_artifacts"]["artifacts"]:
    
    artifact_dir=os.path.dirname(artifact_dict["artifact_path"]) 
    artifact_filename=os.path.basename(artifact_dict["artifact_path"])
    
    # save to local artifact store
    common_containers.ArtifactCopiers.copy_from_disk_to_disk(
        source_dir=artifact_dir,
        target_dir=artifact_dict["local_dir"],
        filename=artifact_filename,
        overwrite=False,
        delete_source=False)

    # copy to remote artifact store
    copy_from_local_to_remote(source_dir=artifact_dict["local_dir"],
                              target_dir=artifact_dict["remote_dir"],
                              filename=artifact_filename,
                              overwrite=False,
                              delete_source=False)

In [49]:
print("Session artifacts stored with following parameters")
print_dict(config["exec"]["save_session_artifacts"])

Session artifacts stored with following parameters
{
  "artifacts": [
    {
      "artifact_path": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_array.json",
      "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/session",
      "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/session"
    },
    {
      "artifact_path": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
      "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/session",
      "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/session"
    }
  ]
}


### Save model

In [50]:
for model_format, save_model in save_model_dict.items():
    
    tasks.store_model(save_model=save_model,
                      model=model,
                      copy_from_local_to_remote = copy_from_local_to_remote,
                      **config["exec"]["save_model"][model_format]
                      )

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: c:/tf_serving/_tmp_model/test_array__b02a1c74a09642639b46f7d0457d5e2e__tf_serving_predict\1\saved_model.pb


In [51]:
print("Model saved with following paramters: \n")
print_dict(config["exec"]["save_model"])

Model saved with following paramters: 

{
  "save_hdf5": {
    "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/models",
    "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/models",
    "filename": "test_array__b02a1c74a09642639b46f7d0457d5e2e__hdf5",
    "extension": ".h5",
    "overwrite_remote": true
  },
  "save_tensorflow_serving_predict_signature_def": {
    "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/models",
    "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/models",
    "filename": "test_array__b02a1c74a09642639b46f7d0457d5e2e__tf_serving_predict",
    "temp_base_dir": "c:/tf_serving/_tmp_model/test_array__b02a1c74a09642639b46f7d0457d5e2e__tf_serving_predict",
    "extension": ".zip",
    "overwrite_remote": true,
    "do_save_labels_txt": true,
    "input_name": "input",
    "output_name": "output",
    "labels_list": [
      "cat",
      "dog"
    ]
  }
}


### Evaluate metrics

##### Transform data

In [52]:
data_bunch_metrics = maybe_transform(data_bunch_fit, config["exec"]["evaluate"].get("pre_execution_transformation"))

print("Data transformed with following parameters: \n")
print_dict(config["exec"]["evaluate"].get("pre_execution_transformation"))

Data transformed with following parameters: 

null


In [53]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_metrics)

Transformed data_bunch consists of: 

train <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>

valid <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>

test <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>



##### Calculate metrics

In [54]:
metrics = {}
for data_set_name in config["exec"]["evaluate"]["data_set_names"]:
    data_set = getattr(data_bunch_metrics, data_set_name)
    metrics[data_set_name] = evaluate(model, data_set, **config["exec"]["evaluate"]["params"])



In [55]:
print("Resulting metrics: \n")
print_dict(metrics)

Resulting metrics: 

{
  "test": {
    "loss": 1.822854995727539,
    "acc": 0.5
  }
}


### Save metrics

In [56]:
for data_set_name, params in config["exec"]["save_metrics"]["data_sets"].items():
    tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote, metrics[data_set_name], **params)

### Predict

##### Transform data

In [57]:
data_bunch_predict = maybe_transform(data_bunch_metrics, config["exec"]["predict"].get("pre_execution_transformation"))
    
print("Data transformed with following parameters: \n")
print_dict(config["exec"]["predict"].get("pre_execution_transformation"))

Data transformed with following parameters: 

null


In [58]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_predict)

Transformed data_bunch consists of: 

train <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>

valid <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>

test <DataSet>
  full_data <NumpyDataWrapper>
  index <NumpyDataWrapper>
  features <NumpyDataWrapper>
  targets <NumpyDataWrapper>



##### Perform prediction

In [59]:
for data_set_name in config["exec"]["predict"]["data_set_names"]:
    data_set = getattr(data_bunch_predict, data_set_name)
    data_set.predictions = predict(model=model, data_set=data_set, **config["exec"]["predict"]["params"])

In [60]:
print("Data predicted with following parameters: \n")
print_dict(config["exec"]["predict"].get("params"))

Data predicted with following parameters: 

{}


### Evaluate custom metrics

##### Transform data

In [61]:
data_bunch_custom_metrics = maybe_transform(data_bunch_predict, 
                                            config["exec"]["evaluate_custom_metrics"].get("pre_execution_transformation"))

In [62]:
print("Data transformed with following parameters: \n")
print_dict(config["exec"]["evaluate_custom_metrics"].get("pre_execution_transformation"))

Data transformed with following parameters: 

{
  "data_set_names": [
    "test"
  ],
  "params": {
    "transform_to": "numpy",
    "data_wrapper_params": {
      "predictions": {},
      "index": {},
      "targets": {}
    }
  }
}


In [63]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_custom_metrics)

Transformed data_bunch consists of: 

test <DataSet>
  predictions <NumpyDataWrapper>
  index <NumpyDataWrapper>
  targets <NumpyDataWrapper>



##### Calculate custom metrics


In [64]:
custom_metrics = {}
for data_set_name in config["exec"]["evaluate_custom_metrics"]["data_set_names"]:
    data_set = getattr(data_bunch_custom_metrics, data_set_name)
    custom_metrics[data_set_name]  = tasks.evaluate_metrics(data_set, custom_metrics_dict)

In [65]:
print("Resulting custom metrics: \n")
print_dict(custom_metrics)

Resulting custom metrics: 

{
  "test": {
    "evaluate_numpy_auc": 0.26111111111111107,
    "evaluate_numpy_micro_auc": 0.53
  }
}


##### Calculate custom label metrics

In [66]:
custom_label_metrics = {}
for data_set_name in config["exec"]["evaluate_custom_label_metrics"]["data_set_names"]:
    data_set = getattr(data_bunch_custom_metrics, data_set_name)
    custom_label_metrics[data_set_name] = tasks.evaluate_label_metrics(data_set, custom_label_metrics_dict)

In [67]:
print("Resulting custom label metrics: \n")
print_dict(custom_label_metrics)

Resulting custom label metrics: 

{
  "test": {
    "Accuracy": {
      "field4_target": 0.6,
      "field5_target": 0.8,
      "field6_target": 0.7
    },
    "ConfMat_Count_field4_target": {
      "field4_target": 10,
      "field5_target": 4,
      "field6_target": 6
    },
    "ConfMat_Rate_field4_target": {
      "field4_target": 1.0,
      "field5_target": 1.0,
      "field6_target": 1.0
    },
    "ConfMat_Count_field5_target": {
      "field4_target": 0,
      "field5_target": 0,
      "field6_target": 0
    },
    "ConfMat_Rate_field5_target": {
      "field4_target": 0.0,
      "field5_target": 0.0,
      "field6_target": 0.0
    },
    "ConfMat_Count_field6_target": {
      "field4_target": 0,
      "field5_target": 0,
      "field6_target": 0
    },
    "ConfMat_Rate_field6_target": {
      "field4_target": 0.0,
      "field5_target": 0.0,
      "field6_target": 0.0
    }
  }
}


In [68]:
for data_set_name, params in config["exec"]["save_custom_metrics"]["data_sets"].items():
    tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote,
                          custom_metrics[data_set_name], **params)

In [69]:
print("Custom metrics saved with following parameters: \n")
print_dict(config["exec"]["save_custom_metrics"])

Custom metrics saved with following parameters: 

{
  "data_sets": {
    "test": {
      "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/metrics/test",
      "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/metrics/test",
      "filename": "test_array__b02a1c74a09642639b46f7d0457d5e2e__test__custom_metrics"
    }
  }
}


In [70]:
for data_set_name, params in config["exec"]["save_custom_label_metrics"]["data_sets"].items():
    tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote,
                          custom_label_metrics[data_set_name], **params)

In [71]:
print("Custom label metrics saved with following parameters: \n")
print_dict(config["exec"]["save_custom_label_metrics"])

Custom label metrics saved with following parameters: 

{
  "data_sets": {
    "test": {
      "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/metrics/test",
      "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/metrics/test",
      "filename": "test_array__b02a1c74a09642639b46f7d0457d5e2e__test__custom_label_metrics"
    }
  }
}


### Prepare predictions for storage

##### Transform data

In [72]:
data_bunch_prediction_preparation = maybe_transform(data_bunch_predict, 
                                                    config["exec"]["prepare_predictions_for_storage"].get("pre_execution_transformation"))

In [73]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_prediction_preparation)

Transformed data_bunch consists of: 

test <DataSet>
  predictions <PandasDataWrapper>
  index <PandasDataWrapper>
  targets <PandasDataWrapper>



##### Prepare predictions and targets

In [74]:
for data_set_name in config["exec"]["prepare_predictions_for_storage"]["data_set_names"]:
    data_set = getattr(data_bunch_prediction_preparation, data_set_name)
    data_set.add_data_wrapper_via_concatenate(**config["exec"]["prepare_predictions_for_storage"]["params"]["predictions"])
    data_set.add_data_wrapper_via_concatenate(**config["exec"]["prepare_predictions_for_storage"]["params"]["targets"])

In [75]:
print_data_bunch(data_bunch_prediction_preparation)

test <DataSet>
  predictions <PandasDataWrapper>
  index <PandasDataWrapper>
  targets <PandasDataWrapper>
  predictions_for_storage <PandasDataWrapper>
  targets_for_storage <PandasDataWrapper>



### Save predictions

##### Transform data

In [76]:
data_bunch_prediction_storage = maybe_transform(data_bunch_prediction_preparation, 
                                                config["exec"]["save_predictions"].get("pre_execution_transformation"))

In [77]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_prediction_storage)

Transformed data_bunch consists of: 

test <DataSet>
  predictions <PandasDataWrapper>
  index <PandasDataWrapper>
  targets <PandasDataWrapper>
  predictions_for_storage <PandasDataWrapper>
  targets_for_storage <PandasDataWrapper>



##### Save predictions

In [78]:
for data_set_name, data_set_params in config["exec"]["save_predictions"]["data_sets"].items():
    data_set = getattr(data_bunch_prediction_storage, data_set_name)
    data_wrapper = getattr(data_set, data_set_params["data_wrapper_name"])
    
    data_to_store = data_wrapper.underlying
   
    tasks.store_artifacts(store_prediction_artifact_locally, copy_from_local_to_remote,
                          data_to_store, **data_set_params["params"])

In [79]:
print("Predictions saved with following parameters: \n")
print_dict(config["exec"]["save_predictions"])

Predictions saved with following parameters: 

{
  "data_sets": {
    "test": {
      "data_wrapper_name": "predictions_for_storage",
      "params": {
        "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/predictions/test",
        "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/predictions/test",
        "filename": "test_array__b02a1c74a09642639b46f7d0457d5e2e__test__predictions"
      }
    }
  }
}


##### Save targets

In [80]:
for data_set_name, data_set_params in config["exec"]["save_targets"]["data_sets"].items():
    data_set = getattr(data_bunch_prediction_storage, data_set_name)
    data_wrapper = getattr(data_set, data_set_params["data_wrapper_name"])
    
    data_to_store = data_wrapper.underlying
   
    tasks.store_artifacts(store_prediction_artifact_locally, copy_from_local_to_remote,
                          data_to_store, **data_set_params["params"])

In [81]:
print("Targets saved with following parameters: \n")
print_dict(config["exec"]["save_targets"])

Targets saved with following parameters: 

{
  "data_sets": {
    "test": {
      "data_wrapper_name": "targets_for_storage",
      "params": {
        "local_dir": "./example_results/local/b02a1c74a09642639b46f7d0457d5e2e/predictions/test",
        "remote_dir": "./example_results/remote/b02a1c74a09642639b46f7d0457d5e2e/predictions/test",
        "filename": "test_array__b02a1c74a09642639b46f7d0457d5e2e__test__targets"
      }
    }
  }
}
