# Keras workflow

## Imports

In [198]:
import sys
import os
sys.path.append(os.path.split(os.path.split(os.getcwd())[0])[0])
config_filepath = os.path.join(os.getcwd(),"config/fit_config_generator.json")
notebook_filepath = os.path.join(os.getcwd(),"fit.ipynb")
import uuid
import json
import datetime
import getpass

from mercury_ml.common import tasks
from mercury_ml.common import utils
from mercury_ml.common import containers as common_containers
from mercury_ml.keras import containers as keras_containers

In [199]:
#For testing purposes only!

if os.path.isdir("./example_results"):
    import shutil
    shutil.rmtree("./example_results")

## Helpers

These functions will help with the flow of this particular notebook

In [200]:
def print_data_bunch(data_bunch):

    for data_set_name, data_set in data_bunch.__dict__.items():
        print("{} <{}>".format(data_set_name, type(data_set).__name__))
        for data_wrapper_name, data_wrapper in data_set.__dict__.items():
            print("  {} <{}>".format(data_wrapper_name, type(data_wrapper).__name__))
        print()
        
def maybe_transform(data_bunch, pre_execution_parameters):
    if pre_execution_parameters:
        return data_bunch.transform(**pre_execution_parameters)
    else:
        return data_bunch
        
def print_dict(d):
    print(json.dumps(d, indent=2))

def get_installed_packages():
    import pip
    try:
        from pip._internal.operations import freeze
    except ImportError:  # pip < 10.0
        from pip.operations import freeze

    packages = []
    for p in freeze.freeze():
        packages.append(p)

    return packages

## Config

#### Load config

In [201]:
config = utils.load_referenced_json_config(config_filepath)

In [202]:
print_dict(config)

{
  "global_references": {
    "number_of_classes": 2,
    "batch_size": 2,
    "labels": [
      "cat",
      "dog"
    ]
  },
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_generator",
    "session_id": "{session_id}",
    "model_object_name": "{model_purpose}__{session_id}",
    "data_bunch_name": "images_456",
    "notebook_filepath": "{notebook_filepath}",
    "config_filepath": "{config_filepath}"
  },
  "init": {
    "read_source_data": {
      "name": "read_disk_keras_single_input_iterator"
    },
    "define_model": {
      "name": "define_conv_simple"
    },
    "get_optimizer": {
      "name": "get_keras_optimizer"
    },
    "get_loss_function": {
      "name": "get_keras_loss"
    },
    "compile_model": {
      "name": "compile_model"
    },
    "fit": {
      "name": "fit_generator"
    },
    "save_model": {
      "names": [
        "save_hdf5",
        "save_tensorflow_serving_predict_signature_def"
      ]
    },
    "copy_from_loca

#### Set model_id

In [203]:
session_id = str(uuid.uuid4().hex)

In [204]:
print(session_id)

16c534ee22ec4a068e0ebe1dfaa2f0d9


#### Update config

The function `utils.recursively_update_config(config, string_formatting_dict)` allows us to use string formatting to replace placeholder strings with acctual values.

for example: 

```python
>>> config = {"some_value": "some_string_{some_placeholder}"}
>>> string_formatting_dict = {"some_placeholder": "ABC"}
>>> utils.recursively_update_config(config, string_formatting_dict)
>>> print(config)
{"some_value": "some_string_ABC}"}
```



First update `config["meta_info"]`

In [205]:
utils.recursively_update_config(config["meta_info"], {
    "session_id": session_id,
    "model_purpose": config["meta_info"]["model_purpose"],
    "config_filepath": config_filepath,
    "notebook_filepath": notebook_filepath
})

Then use `config["meta_info"]` to update the rest.

In [206]:
utils.recursively_update_config(config, config["meta_info"])

In [207]:
print_dict(config)

{
  "global_references": {
    "number_of_classes": 2,
    "batch_size": 2,
    "labels": [
      "cat",
      "dog"
    ]
  },
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_generator",
    "session_id": "16c534ee22ec4a068e0ebe1dfaa2f0d9",
    "model_object_name": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9",
    "data_bunch_name": "images_456",
    "notebook_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
    "config_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_generator.json"
  },
  "init": {
    "read_source_data": {
      "name": "read_disk_keras_single_input_iterator"
    },
    "define_model": {
      "name": "define_conv_simple"
    },
    "get_optimizer": {
      "name": "get_keras_optimizer"
    },
    "get_loss_function": {
      "name": "get_keras_loss"
    },
    "compile_model": {
      "name": "compile_model"
    }

## Session

Create a small dictionary with the session information. This will later be stored as a dictionary artifact with all the key run infomration

In [208]:
session = {
    "time_stamp": datetime.datetime.utcnow().isoformat()[:-3] + "Z",
    "run_by": getpass.getuser(),
    "meta_info": config["meta_info"],
    "installed_packages": get_installed_packages()
}

In [209]:
print("Session info")
print(json.dumps(session, indent=2))

Session info
{
  "time_stamp": "2019-02-25T12:06:19.137Z",
  "run_by": "karl.schriek",
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_generator",
    "session_id": "16c534ee22ec4a068e0ebe1dfaa2f0d9",
    "model_object_name": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9",
    "data_bunch_name": "images_456",
    "notebook_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
    "config_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_generator.json"
  },
  "installed_packages": [
    "absl-py==0.7.0",
    "astor==0.7.1",
    "atomicwrites==1.3.0",
    "attrs==18.2.0",
    "backcall==0.1.0",
    "bleach==3.1.0",
    "boto3==1.9.86",
    "botocore==1.12.86",
    "cachetools==3.1.0",
    "certifi==2018.11.29",
    "chardet==3.0.4",
    "colorama==0.4.1",
    "decorator==4.3.2",
    "defusedxml==0.5.0",
    "docutils==0.14",
    "entrypoints==0

## Initialization

Theseare the functions or classes we will be using in this workflow. We get / instatiate them all at the beginning using parameters under `config["initialization"]`.

Here we use mainly use `getattr` to fetch them via the `containers` module based on a string input in the config file. Providers could however also be fetched directly. The following three methods are all equivalent:

```python
# 1. (what we are using in this notebook)
from ml_workflow.common import containers as common_containers
source_reader=getattr(common_containers.SourceReaders, "read_pandas_data_set")

# 2. 
from ml_workflow.common import containers as common_containers
source_reader=common_containers.SourceReaders.read_pandas_data_set

# 3.
from ml_workflow.common.providers.source_reading import read_pandas_data_set
source_reader=read_pandas_data_set
```


### Helpers

These helper functions will create instantiate class providers (`create_and_log`) or fetch function providers (`get_and_log`) based on the parameters provided

In [210]:
def create_and_log(container, class_name, params):
    provider = getattr(container, class_name)(**params)
    print("{}.{}".format(container.__name__, class_name))
    print("params: ", json.dumps(params, indent=2))
    return provider

def get_and_log(container, function_name):
    provider = getattr(container, function_name)
    print("{}.{}".format(container.__name__, function_name))
    return provider

### Common

These are providers that are universally relevant, regardless of which Machine Learning engine is used.

In [211]:
# a function for storing dictionary artifacts to local disk
store_artifact_locally = get_and_log(common_containers.LocalArtifactStorers,
                                     config["init"]["store_artifact_locally"]["name"])

LocalArtifactStorers.store_dict_json


In [212]:
# a function for storing data-frame-like artifacts to local disk
store_prediction_artifact_locally = get_and_log(common_containers.LocalArtifactStorers,
                                                config["init"]["store_prediction_artifact_locally"]["name"])

LocalArtifactStorers.store_pandas_pickle


In [213]:
# a function for copy artifacts from local disk to a remote store
copy_from_local_to_remote = get_and_log(common_containers.ArtifactCopiers, config["init"]["copy_from_local_to_remote"]["name"])

ArtifactCopiers.copy_from_disk_to_disk


In [214]:
# a function for reading source data. When called it will return an instance of type DataBunch 
read_source_data_set = get_and_log(common_containers.SourceReaders, config["init"]["read_source_data"]["name"])

SourceReaders.read_disk_keras_single_input_iterator


In [215]:
# a dictionary of functions that calculate custom metrics
custom_metrics_dict = {
    custom_metric_name: get_and_log(common_containers.CustomMetrics, custom_metric_name) for custom_metric_name in config["init"]["custom_metrics"]["names"]
}


CustomMetrics.evaluate_numpy_auc
CustomMetrics.evaluate_numpy_micro_auc


In [216]:
# a dictionary of functions that calculate custom label metrics
custom_label_metrics_dict = {
    custom_label_metric_name: get_and_log(common_containers.CustomLabelMetrics, custom_label_metric_name) for custom_label_metric_name in config["init"]["custom_label_metrics"]["names"]
}


CustomLabelMetrics.evaluate_numpy_accuracy
CustomLabelMetrics.evaluate_numpy_confusion_matrix


### Keras

In [217]:
# a function that returns an uncompiled keras model
define_model = get_and_log(keras_containers.ModelDefinitions, 
                           config["init"]["define_model"]["name"])

ModelDefinitions.define_conv_simple


In [218]:
# a function that returns a keras loss function
get_loss_function = get_and_log(keras_containers.LossFunctionFetchers, 
                                config["init"]["get_loss_function"]["name"])

LossFunctionFetchers.get_keras_loss


In [219]:
# a function that returns a keras optimizer
get_optimizer = get_and_log(keras_containers.OptimizerFetchers, 
                           config["init"]["get_optimizer"]["name"])


OptimizerFetchers.get_keras_optimizer


In [220]:
# a function that returns a compiled keras model
compile_model = get_and_log(keras_containers.ModelCompilers, 
                            config["init"]["compile_model"]["name"])

ModelCompilers.compile_model


In [221]:
# a function that fits a compiled keras model
fit = get_and_log(keras_containers.ModelFitters, config["init"]["fit"]["name"])

ModelFitters.fit_generator


In [222]:
# a list of functions that serve as callback when fitting a keras model
callbacks = []
for callback in config["init"]["callbacks"]:
    callbacks = callbacks + [get_and_log(keras_containers.CallBacks, callback["name"])(callback["params"])]

CallBacks.early_stopping
CallBacks.model_checkpoint


In [223]:
# a function for evaluating keras metrics
evaluate = get_and_log(keras_containers.ModelEvaluators, config["init"]["evaluate"]["name"])

ModelEvaluators.evaluate_generator


In [224]:
# a dictionary of functions that save keras models in various formats
save_model_dict = {
    save_model_function_name: get_and_log(keras_containers.ModelSavers, save_model_function_name) for save_model_function_name in config["init"]["save_model"]["names"]
}


ModelSavers.save_hdf5
ModelSavers.save_tensorflow_serving_predict_signature_def


In [225]:
# a function that predictions using a keras model
predict = get_and_log(keras_containers.PredictionFunctions, config["init"]["predict"]["name"])

PredictionFunctions.predict_generator


## Execution

Here we use the providers defined above to execute various tasks

### Get source data

In [226]:
data_bunch_source = tasks.read_train_valid_test_data_bunch(read_source_data_set,**config["exec"]["read_source_data"]["params"] )
print("Source data read using following parameters: \n")
print_dict(config["exec"]["read_source_data"]["params"])

Found 14 images belonging to 2 classes.
Found 6 images belonging to 2 classes.
Found 6 images belonging to 2 classes.
Source data read using following parameters: 

{
  "train_params": {
    "generator_params": {
      "channel_shift_range": 0.0,
      "data_format": "channels_last",
      "featurewise_center": false,
      "featurewise_std_normalization": false,
      "fill_mode": "nearest",
      "height_shift_range": 0.1,
      "horizontal_flip": true,
      "rescale": 0.00392156862745098,
      "rotation_range": 0.2,
      "samplewise_center": true,
      "samplewise_std_normalization": true,
      "shear_range": 0.1,
      "vertical_flip": true,
      "width_shift_range": 0.1,
      "zca_epsilon": 1e-06,
      "zca_whitening": false,
      "zoom_range": 0.1
    },
    "iterator_params": {
      "directory": "./example_data/images_456/train",
      "batch_size": 2,
      "class_mode": "categorical",
      "color_mode": "rgb",
      "seed": 12345,
      "shuffle": true,
      "targe

In [227]:
print("Read data_bunch consists of: \n")
print_data_bunch(data_bunch_source)

Read data_bunch consists of: 

train <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

valid <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

test <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>



### Define model

In [228]:
model = define_model(**config["exec"]["define_model"]["params"])

In [229]:
print("Model defintion used:", config["init"]["define_model"]["name"], "\n")
print("Model parameters used: ")
print_dict(config["exec"]["define_model"]["params"])

Model defintion used: define_conv_simple 

Model parameters used: 
{
  "nb_classes": 2,
  "input_size": [
    10,
    10
  ],
  "final_activation": "softmax",
  "dropout_rate": 0.1
}


### Compile model

In [230]:
optimizer = get_optimizer(**config["exec"]["get_optimizer"]["params"])

In [231]:
print("Optimizer fetched with following parameters: ")
print_dict(config["exec"]["get_optimizer"]["params"])

Optimizer fetched with following parameters: 
{
  "optimizer_name": "adam",
  "optimizer_params": {
    "lr": 0.0001
  }
}


In [232]:
loss = get_loss_function(**config["exec"]["get_loss_function"]["params"])

In [233]:
print("Loss function fetched with following parameters: ")
print_dict(config["exec"]["get_optimizer"]["params"])

Loss function fetched with following parameters: 
{
  "optimizer_name": "adam",
  "optimizer_params": {
    "lr": 0.0001
  }
}


In [234]:
model = compile_model(model=model,
                      optimizer=optimizer,
                      loss=loss,
                      **config["exec"]["compile_model"]["params"])

### Fit model

##### Transform data

In [235]:
data_bunch_fit = maybe_transform(data_bunch_source, config["exec"]["fit"].get("pre_execution_transformation"))

print("Data transformed with following parameters: \n")
print_dict(config["exec"]["fit"].get("pre_execution_transformation"))

Data transformed with following parameters: 

null


In [236]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_fit)

Transformed data_bunch consists of: 

train <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

valid <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

test <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>



##### Perform fitting

In [237]:
model = fit(model = model,
            data_bunch = data_bunch_fit,
            callbacks = callbacks,
            **config["exec"]["fit"]["params"])

Epoch 1/5
Epoch 2/5
Epoch 3/5
return_best_model set to False. Returning model from last epoch


In [238]:
actual_epochs=len(model.history.history["acc"])
print("Actual trained epochs: {}".format(actual_epochs))

Actual trained epochs: 3


In [239]:
print("Training history: ")
print_dict(model.history.history)

Training history: 
{
  "val_loss": [
    1.1369712601105373,
    1.3458143770694733,
    1.2361086110273998
  ],
  "val_acc": [
    0.5,
    0.5,
    0.5
  ],
  "loss": [
    0.7643173156040055,
    1.1812546838607108,
    1.1540226957627706
  ],
  "acc": [
    0.5,
    0.5,
    0.5
  ]
}


In [240]:
session["actual_epochs"]=actual_epochs
session["history"]=model.history.history

### Save (formatted) config

In [241]:
tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote, config,
                      **config["exec"]["save_formatted_config"]["params"])

In [242]:
print("Config stored with following parameters")
print_dict(config["exec"]["save_formatted_config"]["params"])

Config stored with following parameters
{
  "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/session",
  "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/session",
  "filename": "config_formatted"
}


### Save Session

##### Save session info

In [243]:
tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote, session,
                      **config["exec"]["save_session"]["params"])

In [244]:
print("Session dictionary stored with following parameters")
print_dict(config["exec"]["save_session"]["params"])

Session dictionary stored with following parameters
{
  "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/session",
  "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/session",
  "filename": "session"
}


In [245]:
print_dict(config)

{
  "global_references": {
    "number_of_classes": 2,
    "batch_size": 2,
    "labels": [
      "cat",
      "dog"
    ]
  },
  "meta_info": {
    "ml_engine": "keras (tensorflow)",
    "model_purpose": "test_generator",
    "session_id": "16c534ee22ec4a068e0ebe1dfaa2f0d9",
    "model_object_name": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9",
    "data_bunch_name": "images_456",
    "notebook_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
    "config_filepath": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_generator.json"
  },
  "init": {
    "read_source_data": {
      "name": "read_disk_keras_single_input_iterator"
    },
    "define_model": {
      "name": "define_conv_simple"
    },
    "get_optimizer": {
      "name": "get_keras_optimizer"
    },
    "get_loss_function": {
      "name": "get_keras_loss"
    },
    "compile_model": {
      "name": "compile_model"
    }

##### Save session artifacts

In [246]:
for artifact_dict in config["exec"]["save_session_artifacts"]["artifacts"]:
    
    artifact_dir=os.path.dirname(artifact_dict["artifact_path"]) 
    artifact_filename=os.path.basename(artifact_dict["artifact_path"])
    
    # save to local artifact store
    common_containers.ArtifactCopiers.copy_from_disk_to_disk(
        source_dir=artifact_dir,
        target_dir=artifact_dict["local_dir"],
        filename=artifact_filename,
        overwrite=False,
        delete_source=False)

    # copy to remote artifact store
    copy_from_local_to_remote(source_dir=artifact_dict["local_dir"],
                              target_dir=artifact_dict["remote_dir"],
                              filename=artifact_filename,
                              overwrite=False,
                              delete_source=False)
    

In [247]:
print("Session artifacts stored with following parameters")
print_dict(config["exec"]["save_session_artifacts"])

Session artifacts stored with following parameters
{
  "artifacts": [
    {
      "artifact_path": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\config/fit_config_generator.json",
      "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/session",
      "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/session"
    },
    {
      "artifact_path": "C:\\Users\\karl.schriek\\PycharmProjects\\mercury-ml-github\\examples\\keras\\fit.ipynb",
      "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/session",
      "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/session"
    }
  ]
}


### Save model

In [248]:
for model_format, save_model in save_model_dict.items():
    
    tasks.store_model(save_model=save_model,
                      model=model,
                      copy_from_local_to_remote = copy_from_local_to_remote,
                      **config["exec"]["save_model"][model_format]
                      )

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: c:/tf_serving/_tmp_model/test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__tf_serving_predict\1\saved_model.pb


In [249]:
print("Model saved with following paramters: \n")
print_dict(config["exec"]["save_model"])

Model saved with following paramters: 

{
  "save_hdf5": {
    "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/models",
    "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/models",
    "filename": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__hdf5",
    "extension": ".h5",
    "overwrite_remote": true
  },
  "save_tensorflow_serving_predict_signature_def": {
    "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/models",
    "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/models",
    "filename": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__tf_serving_predict",
    "temp_base_dir": "c:/tf_serving/_tmp_model/test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__tf_serving_predict",
    "extension": ".zip",
    "overwrite_remote": true,
    "do_save_labels_txt": true,
    "input_name": "input",
    "output_name": "output",
    "labels_list": [
      "cat",
      "dog"
    ]
  }
}


### Evaluate metrics

##### Transform data

In [250]:
data_bunch_metrics = maybe_transform(data_bunch_fit, config["exec"]["evaluate"].get("pre_execution_transformation"))

print("Data transformed with following parameters: \n")
print_dict(config["exec"]["evaluate"].get("pre_execution_transformation"))

Data transformed with following parameters: 

null


In [251]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_metrics)

Transformed data_bunch consists of: 

train <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

valid <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

test <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>



##### Calculate metrics

In [252]:
metrics = {}
for data_set_name in config["exec"]["evaluate"]["data_set_names"]:
    data_set = getattr(data_bunch_metrics, data_set_name)
    metrics[data_set_name] = evaluate(model, data_set, **config["exec"]["evaluate"]["params"])

In [253]:
print("Resulting metrics: \n")
print_dict(metrics)

Resulting metrics: 

{
  "test": {
    "loss": 1.2512305676937103,
    "acc": 0.5
  }
}


### Save metrics

In [254]:
for data_set_name, params in config["exec"]["save_metrics"]["data_sets"].items():
    tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote, metrics[data_set_name], **params)

### Predict

##### Transform data

In [255]:
data_bunch_predict = maybe_transform(data_bunch_metrics, config["exec"]["predict"].get("pre_execution_transformation"))
    
print("Data transformed with following parameters: \n")
print_dict(config["exec"]["predict"].get("pre_execution_transformation"))

Data transformed with following parameters: 

null


In [256]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_predict)

Transformed data_bunch consists of: 

train <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

valid <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>

test <DataSet>
  features <KerasIteratorFeaturesDataWrapper>
  targets <KerasIteratorTargetsDataWrapper>
  index <KerasIteratorIndexDataWrapper>



##### Perform prediction

In [257]:
for data_set_name in config["exec"]["predict"]["data_set_names"]:
    data_set = getattr(data_bunch_predict, data_set_name)
    data_set.predictions = predict(model=model, data_set=data_set, **config["exec"]["predict"]["params"])

In [258]:
print("Data predicted with following parameters: \n")
print_dict(config["exec"]["predict"].get("params"))


Data predicted with following parameters: 

{}


### Evaluate custom metrics

##### Transform data

In [259]:
data_bunch_custom_metrics = maybe_transform(data_bunch_predict, 
                                            config["exec"]["evaluate_custom_metrics"].get("pre_execution_transformation"))

In [260]:
print("Data transformed with following parameters: \n")
print_dict(config["exec"]["evaluate_custom_metrics"].get("pre_execution_transformation"))

Data transformed with following parameters: 

{
  "data_set_names": [
    "test"
  ],
  "params": {
    "transform_to": "numpy",
    "data_wrapper_params": {
      "predictions": {},
      "index": {},
      "targets": {}
    }
  }
}


In [261]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_custom_metrics)


Transformed data_bunch consists of: 

test <DataSet>
  predictions <NumpyDataWrapper>
  index <NumpyDataWrapper>
  targets <NumpyDataWrapper>



##### Calculate custom metrics


In [262]:
custom_metrics = {}
for data_set_name in config["exec"]["evaluate_custom_metrics"]["data_set_names"]:
    data_set = getattr(data_bunch_custom_metrics, data_set_name)
    custom_metrics[data_set_name]  = tasks.evaluate_metrics(data_set, custom_metrics_dict)


In [263]:
print("Resulting custom metrics: \n")
print_dict(custom_metrics)


Resulting custom metrics: 

{
  "test": {
    "evaluate_numpy_auc": 0.5555555555555556,
    "evaluate_numpy_micro_auc": 0.5277777777777777
  }
}


##### Calculate custom label metrics

In [264]:
custom_label_metrics = {}
for data_set_name in config["exec"]["evaluate_custom_label_metrics"]["data_set_names"]:
    data_set = getattr(data_bunch_custom_metrics, data_set_name)
    custom_label_metrics[data_set_name] = tasks.evaluate_label_metrics(data_set, custom_label_metrics_dict)

In [265]:
print("Resulting custom label metrics: \n")
print_dict(custom_label_metrics)

Resulting custom label metrics: 

{
  "test": {
    "Accuracy": {
      "cat": 0.5,
      "dog": 0.5
    },
    "ConfMat_Count_cat": {
      "cat": 3,
      "dog": 3
    },
    "ConfMat_Rate_cat": {
      "cat": 1.0,
      "dog": 1.0
    },
    "ConfMat_Count_dog": {
      "cat": 0,
      "dog": 0
    },
    "ConfMat_Rate_dog": {
      "cat": 0.0,
      "dog": 0.0
    }
  }
}


In [266]:
for data_set_name, params in config["exec"]["save_custom_metrics"]["data_sets"].items():
    tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote,
                          custom_metrics[data_set_name], **params)

In [267]:
print("Custom metrics saved with following parameters: \n")
print_dict(config["exec"]["save_custom_metrics"])

Custom metrics saved with following parameters: 

{
  "data_sets": {
    "test": {
      "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/metrics/test",
      "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/metrics/test",
      "filename": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__test__custom_metrics"
    }
  }
}


In [268]:
for data_set_name, params in config["exec"]["save_custom_label_metrics"]["data_sets"].items():
    tasks.store_artifacts(store_artifact_locally, copy_from_local_to_remote,
                          custom_label_metrics[data_set_name], **params)

In [269]:
print("Custom label metrics saved with following parameters: \n")
print_dict(config["exec"]["save_custom_label_metrics"])

Custom label metrics saved with following parameters: 

{
  "data_sets": {
    "test": {
      "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/metrics/test",
      "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/metrics/test",
      "filename": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__test__custom_label_metrics"
    }
  }
}


### Prepare predictions for storage

##### Transform data

In [270]:
data_bunch_prediction_preparation = maybe_transform(data_bunch_predict, 
                                                    config["exec"]["prepare_predictions_for_storage"].get("pre_execution_transformation"))

In [271]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_prediction_preparation)

Transformed data_bunch consists of: 

test <DataSet>
  predictions <PandasDataWrapper>
  index <PandasDataWrapper>
  targets <PandasDataWrapper>



##### Prepare predictions and targets

In [272]:
for data_set_name in config["exec"]["prepare_predictions_for_storage"]["data_set_names"]:
    data_set = getattr(data_bunch_prediction_preparation, data_set_name)
    data_set.add_data_wrapper_via_concatenate(**config["exec"]["prepare_predictions_for_storage"]["params"]["predictions"])
    data_set.add_data_wrapper_via_concatenate(**config["exec"]["prepare_predictions_for_storage"]["params"]["targets"])

In [273]:
print_data_bunch(data_bunch_prediction_preparation)

test <DataSet>
  predictions <PandasDataWrapper>
  index <PandasDataWrapper>
  targets <PandasDataWrapper>
  predictions_for_storage <PandasDataWrapper>
  targets_for_storage <PandasDataWrapper>



### Save predictions

##### Transform data

In [274]:
data_bunch_prediction_storage = maybe_transform(data_bunch_prediction_preparation, 
                                                config["exec"]["save_predictions"].get("pre_execution_transformation"))

In [275]:
print("Transformed data_bunch consists of: \n")
print_data_bunch(data_bunch_prediction_storage)

Transformed data_bunch consists of: 

test <DataSet>
  predictions <PandasDataWrapper>
  index <PandasDataWrapper>
  targets <PandasDataWrapper>
  predictions_for_storage <PandasDataWrapper>
  targets_for_storage <PandasDataWrapper>



##### Save predictions

In [276]:
for data_set_name, data_set_params in config["exec"]["save_predictions"]["data_sets"].items():
    data_set = getattr(data_bunch_prediction_storage, data_set_name)
    data_wrapper = getattr(data_set, data_set_params["data_wrapper_name"])
    
    data_to_store = data_wrapper.underlying
   
    tasks.store_artifacts(store_prediction_artifact_locally, copy_from_local_to_remote,
                          data_to_store, **data_set_params["params"])

In [277]:
print("Predictions saved with following parameters: \n")
print_dict(config["exec"]["save_predictions"])

Predictions saved with following parameters: 

{
  "data_sets": {
    "test": {
      "data_wrapper_name": "predictions_for_storage",
      "params": {
        "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/predictions/test",
        "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/predictions/test",
        "filename": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__test__predictions"
      }
    }
  }
}


##### Save targets

In [278]:
for data_set_name, data_set_params in config["exec"]["save_targets"]["data_sets"].items():
    data_set = getattr(data_bunch_prediction_storage, data_set_name)
    data_wrapper = getattr(data_set, data_set_params["data_wrapper_name"])
    
    data_to_store = data_wrapper.underlying
   
    tasks.store_artifacts(store_prediction_artifact_locally, copy_from_local_to_remote,
                          data_to_store, **data_set_params["params"])

In [279]:
print("Targets saved with following parameters: \n")
print_dict(config["exec"]["save_targets"])

Targets saved with following parameters: 

{
  "data_sets": {
    "test": {
      "data_wrapper_name": "targets_for_storage",
      "params": {
        "local_dir": "./example_results/local/16c534ee22ec4a068e0ebe1dfaa2f0d9/predictions/test",
        "remote_dir": "./example_results/remote/16c534ee22ec4a068e0ebe1dfaa2f0d9/predictions/test",
        "filename": "test_generator__16c534ee22ec4a068e0ebe1dfaa2f0d9__test__targets"
      }
    }
  }
}
