In [1]:
!conda run -n base pip install -q --upgrade  git+https://github.com/landerlini/scikinC.git@validation_helpers --force-reinstall --no-deps
import scikinC
import scikinC.layers
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os, os.path
from os import environ

## Remove annoying warnings 
environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf




In [None]:
from deploy_utils import LamarrModel

acceptance = LamarrModel.from_saved_model_pb(
    environ.get("ACCEPTANCE_MODEL",  "/workarea/cloud-storage/anderlinil/models/acceptance/saved_model.pb")
)

efficiency = LamarrModel.from_saved_model_pb(
    environ.get("EFFICIENCY_MODEL",  "/workarea/cloud-storage/anderlinil/models/efficiency/saved_model.pb")
)

n_weights original 166913
n_weights_collapsed 166913


In [None]:
for model in acceptance, efficiency:
    model.tX.transformers_ = [
        (t[0], t[1], list(t[2])) for t in model.tX.transformers_
    ]

In [None]:
from deploy_utils import hacks
scikinC.layers.DenseWithSkipConnection = hacks.scikinC_DenseWithSkipConnection

In [None]:
generated_c = environ.get("GENERATED_C_FILE", "exported/generated.C")

models = dict(
    acceptance=acceptance.pipeline,
    efficiency=efficiency.pipeline
)

print(scikinC.convert(models), file=open(generated_c, 'w'))

In [None]:
lib_path = generated_c.replace('.C', str(np.random.randint(0xFFFFFF)) + '.so')
print (lib_path)
!gcc {generated_c} -o {lib_path} -O3 -lm --shared -fPIC

In [None]:
from scikinC.validation import MLFunction

acceptance.deployed = MLFunction(lib_path, "acceptance", n_inputs=acceptance.model.output_shape[1], n_outputs=1)
efficiency.deployed = MLFunction(lib_path, "efficiency", n_inputs=efficiency.model.output_shape[1], n_outputs=4)

In [None]:
from validation_utils import invert_column_transformer
from feather_io import FeatherReader


def make_comparison_plot(title, y_py, y_py_collapsed, y_c):
    plt.figure(figsize=(10,3))
    plt.subplot(1,2,1)
    _, bins, _ = plt.hist(y_py, bins=50, label="Original python model")
    plt.hist(y_py_collapsed, bins=bins, label="Collapsed model", histtype='step', linewidth=3)
    plt.hist(y_c, bins=bins, label="Deployed model", histtype='step', linewidth=1)
    plt.xlabel("Model response")
    plt.title(title)
    plt.legend()
    plt.yscale('log')

    plt.subplot(1,2,2)
    _, bins, _ = plt.hist(y_c.flatten() - y_py.flatten(), bins=50, label="converting")
    _, bins, _ = plt.hist(y_py_collapsed.flatten() - y_py.flatten(), bins=bins, histtype='step', label="collapsing", linewidth=2)

    plt.xlabel("Conversion error")
    plt.yscale('log')
    plt.legend(title="Error introduced:")

    plt.show()

In [None]:
data_reader = FeatherReader(environ.get("ACCEPTANCE-TEST-DATA", "acceptance-validation"))
test_dataset = data_reader.as_dask_dataframe().head(10_000, npartitions=-1)
pX = test_dataset[data_reader.features].values
X = invert_column_transformer(acceptance.tX, pX)

make_comparison_plot(
    "Acceptance",
    acceptance.model.predict(pX),
    acceptance.collapsed_model.predict(pX),
    acceptance.deployed(X)
)

In [None]:
data_reader = FeatherReader(environ.get("EFFICIENCY-TEST-DATA", "efficiency-validation"))
test_dataset = data_reader.as_dask_dataframe().head(10_000, npartitions=-1)
pX = test_dataset[data_reader.features].values
X = invert_column_transformer(acceptance.tX, pX)

make_comparison_plot(
    "Efficiency as long tracks",
    efficiency.model.predict(pX)[:,1],
    efficiency.collapsed_model.predict(pX)[:,1],
    efficiency.deployed(X)[:,1]
)

make_comparison_plot(
    "Efficiency as upstream tracks",
    efficiency.model.predict(pX)[:,2],
    efficiency.collapsed_model.predict(pX)[:,2],
    efficiency.deployed(X)[:,2]
)

make_comparison_plot(
    "Efficiency as downstream tracks",
    efficiency.model.predict(pX)[:,3],
    efficiency.collapsed_model.predict(pX)[:,3],
    efficiency.deployed(X)[:,3]
)