In [1]:
# Real life data

import logging
import threading
import json
import itertools
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import ibm_db
import shap

from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import seaborn as seabornInstance

from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, func
from iotfunctions import base
from iotfunctions import bif
from iotfunctions.db import Database
from iotfunctions import entity
from iotfunctions import metadata
from iotfunctions.metadata import EntityType
from iotfunctions.enginelog import EngineLogging
from iotfunctions.dbtables import FileModelStore
from iotfunctions import estimator
from iotfunctions.ui import (UISingle, UIMultiItem, UIFunctionOutSingle,
                 UISingleItem, UIFunctionOutMulti, UIMulti, UIExpression,
                 UIText, UIStatusFlag, UIParameters)
from iotfunctions.dbtables import FileModelStore, DBModelStore
from mmfunctions.anomaly import (SaliencybasedGeneralizedAnomalyScore, SpectralAnomalyScore,
                 FFTbasedGeneralizedAnomalyScore, KMeansAnomalyScore, SimpleRegressor,
                                 RANSACRegressor, BayesRidgeRegressor)
from mmfunctions.bif import CumulativeCount
from mmfunctions.customer import ONNXRegressor
from iotfunctions.anomaly import GBMRegressor
import datetime as dt
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.covariance import MinCovDet
from sklearn import metrics
import scipy as sp
import scipy.fftpack
#import skimage as ski  
#from skimage import util as skiutil # for nifty windowing
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

EngineLogging.configure_console_logging(logging.INFO)



### Preparation

**First:** Set up the database object from the pipeline v1 functions SDK

In [18]:
# dummy db handler
db_schema=None
class DatabaseDummy:
    tenant_id = '###_IBM_###'
    db_type = 'db2'
    model_store = FileModelStore('./')
    def _init(self):
        return

db = DatabaseDummy()
        
print (db.tenant_id)

2023-04-02T11:19:49.440 INFO iotfunctions.dbtables.__init__ Init FileModelStore with path: ./
###_IBM_###


-

**Second:** Load training data

-

In [3]:
df_input_raw = pd.read_csv('./data/ONNXRegressorData.csv',
                           parse_dates=['timestamp']) 

df_input = df_input_raw.set_index(['id','timestamp'])

In [22]:
#df_input.loc['N867158'][0:1]

### Set up training 

We make use of a pipeline v1 custom function.

The Pipeline v1 SDK implements support for training regressors with automatic split of training and test data,
data cleaning and optional scaling. Regressors trained that way are compatible to the way pipeline v1 handles pandas data frames.

In [24]:
from mmfunctions.anomaly import RANSACRegressor

# set up a mini Monitor pipeline
EngineLogging.configure_console_logging(logging.DEBUG)

jobsettings = { 'db': db, 
               '_db_schema': 'public', 'save_trace_to_file' : True}

# instantiate the regressor as pipeline v1 function
test3 = RANSACRegressor(['IR','IB'],targets=['IY'],outliers='outliers')

# instantiate an entity type object
et3 = test3._build_entity_type(columns = [Column('GunPressure_Kmeans',Float())], **jobsettings)
et3._timestamp = 'timestamp'

# name it as 'IOT_' + entity name + '_' + entity_id
et3.name = 'IOT_ZROBOT_M3810_626542'

# associate regressor function with entity type
test3._entity_type = et3

# ditch old models if there are any 
test3.auto_train = True
test3.delete_existing_models = True

EngineLogging.configure_console_logging(logging.INFO)

2023-04-02T11:27:33.359 DEBUG iotfunctions.enginelog.configure_console_logging Console logging has been configured. Level = 10
2023-04-02T11:27:33.364 DEBUG iotfunctions.metadata.__init__ Initializing new entity type using iotfunctions 8.9.0
2023-04-02T11:27:33.366 DEBUG iotfunctions.util.__init__ Starting trace
2023-04-02T11:27:33.368 DEBUG iotfunctions.util.__init__ Trace name: auto_trace_test_entity_for_RANSACRegressor_20230402092733
2023-04-02T11:27:33.370 DEBUG iotfunctions.util.__init__ auto_save None
2023-04-02T11:27:33.372 DEBUG iotfunctions.util.categorize_args categorizing arguments
2023-04-02T11:27:33.373 INFO iotfunctions.metadata.__init__ The entity type is not connected to a metric input table.
2023-04-02T11:27:33.374 DEBUG iotfunctions.metadata.__init__ Initialized entity type 
LocalEntityType:TEST_ENTITY_FOR_RANSACREGRESSOR
Functions:
Granularities:
No schedules metadata


In [25]:
# start training
df_input = test3.execute(df=df_input)

2023-04-02T11:27:47.635 ERROR iotfunctions.base.get_bucket_name Could not find credentials for entity type. COS read/write is disabled 
2023-04-02T11:27:47.635 INFO iotfunctions.base.delete_models Model names to delete: ['model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446']
2023-04-02T11:27:47.636 INFO iotfunctions.dbtables.delete_model Model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446 has been deleted from filesystem
2023-04-02T11:27:47.637 INFO iotfunctions.base.get_models_for_training predicting target IY
2023-04-02T11:27:47.637 INFO iotfunctions.dbtables.retrieve_model Model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446 does not exist in filesystem
2023-04-02T11:27:47.638 INFO iotfunctions.base.get_models_for_training load model None
2023-04-02T11:27:47.638 INF

The total space of parameters 1 is smaller than n_iter=3. Running 1 iterations. For exhaustive searches, use GridSearchCV.


2023-04-02T11:27:47.933 INFO iotfunctions.base.find_best_model Trained model no: 0
2023-04-02T11:27:47.935 INFO iotfunctions.base.find_best_model Trained model no: 0 score:-0.02195064907506361
2023-04-02T11:27:47.937 INFO iotfunctions.metadata.test evaluated model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446 with evaluation metric value -0.02395792906262506
2023-04-02T11:27:47.940 INFO iotfunctions.base._execute Finished training model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446
2023-04-02T11:27:47.941 INFO iotfunctions.dbtables.retrieve_model Model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446 of size 2415 bytes has been retrieved from filesystem
2023-04-02T11:27:47.943 INFO iotfunctions.metadata.predict predicted using model model.IOT_ZROBOT_M3810_626542.RA

X has feature names, but BayesianRidge was fitted without feature names
The total space of parameters 1 is smaller than n_iter=3. Running 1 iterations. For exhaustive searches, use GridSearchCV.


2023-04-02T11:27:48.248 INFO iotfunctions.base.find_best_model Trained model no: 0
2023-04-02T11:27:48.250 INFO iotfunctions.base.find_best_model Trained model no: 0 score:-0.03166314226642619
2023-04-02T11:27:48.253 INFO iotfunctions.metadata.test evaluated model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N440208 with evaluation metric value -0.05278452105520315
2023-04-02T11:27:48.255 INFO iotfunctions.base._execute Finished training model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N440208
2023-04-02T11:27:48.256 INFO iotfunctions.dbtables.retrieve_model Model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N440208 of size 2465 bytes has been retrieved from filesystem
2023-04-02T11:27:48.258 INFO iotfunctions.metadata.predict predicted using model model.IOT_ZROBOT_M3810_626542.RA

X has feature names, but BayesianRidge was fitted without feature names
The total space of parameters 1 is smaller than n_iter=3. Running 1 iterations. For exhaustive searches, use GridSearchCV.


2023-04-02T11:27:48.576 INFO iotfunctions.base.find_best_model Trained model no: 0
2023-04-02T11:27:48.578 INFO iotfunctions.base.find_best_model Trained model no: 0 score:-0.0755638486778143
2023-04-02T11:27:48.581 INFO iotfunctions.metadata.test evaluated model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N867158 with evaluation metric value -0.2704368555197356
2023-04-02T11:27:48.583 INFO iotfunctions.base._execute Finished training model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N867158
2023-04-02T11:27:48.584 INFO iotfunctions.dbtables.retrieve_model Model model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N867158 of size 2505 bytes has been retrieved from filesystem
2023-04-02T11:27:48.586 INFO iotfunctions.metadata.predict predicted using model model.IOT_ZROBOT_M3810_626542.RANS

X has feature names, but BayesianRidge was fitted without feature names


### Introspect training results

Training results, a trained regressor for each entity (device), are collected in a python dict. The unique key will be 

model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446

reflecting entity type name and id, the name of the regressor function, a UUID encoded list of features, the target and the device/entity name

Pipeline v1 makes use of that schema to retrieve model binaries from DB2.

In [31]:
# get the model for the first device name 'N438446'
model = test3.active_models['model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446'][0].estimator

In [32]:
model

### Convert the model into ONNX

In [33]:
# turn it into an ONNX model
import onnxmltools
import skl2onnx
import onnx
import sklearn
import os
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
import onnxruntime as rt
from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes  # noqa
import onnxmltools.convert.common.data_types
from skl2onnx.common.data_types import FloatTensorType
import numpy
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from onnxruntime import InferenceSession
from skl2onnx import to_onnx, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes  # noqa
from onnxmltools import __version__ as oml_version
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType

In [34]:
model_onnx = convert_sklearn(model, 'pipeline_ransac', 
    [('input', FloatTensorType([None, 2]))])

In [35]:
with open("pipeline_ransac.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())

In [36]:
from mlprodict.onnxrt import OnnxInference
sess = OnnxInference(model_onnx)
dot = sess.to_dot()
from jyquickhelper import RenderJsDot
RenderJsDot(dot)

### Prepare to save

Set up some a meta object for the entity type to "convince" the pipeline v1 SDK to store the ONNX object as model binary for entity type ZRobot_M380 with the db2 schema name 'MASDEV_MAM' (tenant name + acronym for Maximo Asset Monitor)

In [37]:
from collections import OrderedDict
metadata = OrderedDict()
metadata['name'] = 'ZRobot_M380'
metadata['schemaName'] = 'MASDEV_MAM'

In [38]:
# open a connection to the tenant's db2
with open('/home/markus/src/mmfunctions/credentials_as_monitordev3.json', encoding='utf-8') as F:
    credentials = json.loads(F.read())

realdb = Database(credentials=credentials, tenant_id='MASDEV', entity_metadata=metadata, entity_type_id=626542)

HERE
2023-04-02T11:37:52.241 INFO iotfunctions.db.__init__ Data Dictionary is not available.


In [39]:
# check native connection to tenant database (for model table)
realdb.native_connection, realdb.model_store

(<ibm_db.IBM_DBConnection at 0x7f7779e462e0>,
 <iotfunctions.dbtables.DBModelStore at 0x7f7778b5bd30>)

In [41]:
# here the models we have just trained
list(test3.active_models.keys())

['model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N438446',
 'model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N440208',
 'model.IOT_ZROBOT_M3810_626542.RANSACRegressor.67b537e9ea5557ae8ececb36588ec84baa1e54b5b24c9d2ce7c349d55aae5ea3:IY.N867158']

### 'Rename' models

This is the confusing part:

Please keep in mind that we've trained our models on synthetic data and that we now apply them to predict **random data**. Focus is not on prediction accuracy but the *mechanics* of training, deployment and usage of an ONNX model. 

Furthermore the models carry a different name (key):  

We have trained a RANSACRegressor so the key contains the string 'RANSACRegressor', but the ONNXRegressor function is expecting a name 'SupervisedLearningTransformer'.

Let's find the name pipeline v1 **would** expect


In [44]:
# initiate an ONNXRegressor function
from mmfunctions.customer import ONNXRegressor
onnxregressor = ONNXRegressor(['acc','torque'], ['speed'],
                              ['prediction_speed'],'conf_')
onnxregressor._entity_type = et3  # this object has the correct entity type name and id


In [46]:
# and ask it for the correct name

model_name = onnxregressor.generate_model_name([], 'speed', 'model', '73004.onnx')
model_name

'model.IOT_ZROBOT_M3810_626542.SupervisedLearningTransformer.bbccb9c002d5ed5994789fd8d38a8af3825f7691e10c44cac9b07dc54b15c2ee:speed.73004.onnx'

### Save it !

Now we have the ONNX binary, the correct name for pipeline v1 to get it out of the db2

In [47]:


realdb.model_store.store_model(model_name, model_onnx.SerializeToString(), None, serialize=False)

2023-04-02T11:47:03.237 INFO iotfunctions.dbtables.store_model Model model.IOT_ZROBOT_M3810_626542.SupervisedLearningTransformer.bbccb9c002d5ed5994789fd8d38a8af3825f7691e10c44cac9b07dc54b15c2ee:speed.73004.onnx of size 276 bytes has been stored in table "MASDEV_MAM"."KPI_MODEL_STORE".
