In [1]:
# Real life data

import logging
import threading
import itertools
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import seaborn as seabornInstance
from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, func
from iotfunctions import base
from iotfunctions import bif
from iotfunctions import entity
from iotfunctions import metadata
from iotfunctions.metadata import EntityType
from iotfunctions.db import Database
from iotfunctions.enginelog import EngineLogging
from iotfunctions import estimator
from iotfunctions.ui import (UISingle, UIMultiItem, UIFunctionOutSingle,
                 UISingleItem, UIFunctionOutMulti, UIMulti, UIExpression,
                 UIText, UIStatusFlag, UIParameters)
from mmfunctions.anomaly import (SaliencybasedGeneralizedAnomalyScore, SpectralAnomalyScore,
                 FFTbasedGeneralizedAnomalyScore, KMeansAnomalyScore, GBMRegressor)
import datetime as dt
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import scipy as sp
import scipy.fftpack
import skimage as ski  
from skimage import util as skiutil # for nifty windowing
import pyod as pyod
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

EngineLogging.configure_console_logging(logging.INFO)



In [2]:
credentials={}
db_schema=None
db = Database(credentials=credentials)
print (db)

2020-03-03T08:44:11.264 INFO iotfunctions.db.__init__ Connection string for SqlAlchemy => db2): db2+ibm_db://bluadmin:MjZkZTEwN2FjMWY1@dashdb-enterprise-yp-dal12-125.services.dal.bluemix.net:50000/BLUDB;
<iotfunctions.db.Database object at 0x7ff6265bfb90>


In [3]:
# need a helper function to convert array columns to something easier
from scipy import linalg
def l2norm(df, tcol, col1, col2 = None, col3 = None):
    l2vib = []
    for index, row in df.iterrows():
        l2vib_element = linalg.norm(np.fromstring(row[col1].replace('[',' ').replace(']',''), sep = ','))**2
        if col2 is not None:
            l2vib_element = l2vib_element + \
                            linalg.norm(np.fromstring(row[col2].replace('[',' ').replace(']',''), sep = ','))**2
        if col3 is not None:
            l2vib_element = l2vib_element + \
                            linalg.norm(np.fromstring(row[col3].replace('[',' ').replace(']',''), sep = ','))**2
        l2vib.append(l2vib_element**(1/2))
    df[tcol] = np.asarray(l2vib)
    

def unrollAccel(df):
    l0,l1,l2,l3,l4=[],[],[],[],[]
    for i in df['ACCEL_POWER'].values:
        l0.append(eval(eval(i)[0]))
        l1.append(eval(eval(i)[1]))
        l2.append(eval(eval(i)[2]))
        l3.append(eval(eval(i)[3]))
        l4.append(eval(eval(i)[4]))
    df['accel_power_0'] = np.asarray(l0)
    df['accel_power_1'] = np.asarray(l1)
    df['accel_power_2'] = np.asarray(l2)
    df['accel_power_3'] = np.asarray(l3)
    df['accel_power_4'] = np.asarray(l4)
    
listAttr = ['timestamp','entity','vibrations','rms','accel_speed','accel_power_0','accel_power_1',
            'accel_power_2','accel_power_3','accel_power_4']

In [4]:
# Now we proceed to customer data - GOOD CASE

# Get stuff in
df_input_raw = pd.read_csv('./Armstark04714B6046D5.csv', index_col=False, parse_dates=['RCV_TIMESTAMP_UTC'])
df_input_raw['entity']=df_input_raw['DEVICE_ID']
df_input_raw['timestamp']=df_input_raw['RCV_TIMESTAMP_UTC']

# and sort it by timestamp
df_input_raw = df_input_raw.sort_values(by='timestamp')
df_input_raw = df_input_raw.set_index(['entity','timestamp']).dropna()

l2norm(df_input_raw, 'vibrations', 'VIBRATIONS_XAXIS', 'VIBRATIONS_YAXIS', 'VIBRATIONS_ZAXIS')
l2norm(df_input_raw, 'rms', 'RMS_X', 'RMS_Y', 'RMS_Z')
l2norm(df_input_raw, 'accel_speed', 'ACCEL_SPEED')
unrollAccel(df_input_raw)
#l2norm(df_input_raw, 'accel_power', 'ACCEL_POWER')

df_input = df_input_raw.filter(listAttr, axis=1)
df_input_raw.describe()

  
  
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,vibrations,rms,accel_speed,accel_power_0,accel_power_1,accel_power_2,accel_power_3,accel_power_4
count,8620.0,8620.0,8620.0,8620.0,8620.0,8620.0,8620.0,8620.0
mean,3.759047,1.732051,1.0,2.453523,2.453312,2.452808,2.452053,2.452439
std,0.033272,4.44115e-16,0.0,0.856247,0.857594,0.857678,0.8572,0.85634
min,3.616515,1.732051,1.0,0.0,0.0,0.0,0.0,0.0
25%,3.737696,1.732051,1.0,2.2518,2.2518,2.252,2.252,2.2518
50%,3.757949,1.732051,1.0,2.3138,2.3134,2.3118,2.3122,2.3126
75%,3.780154,1.732051,1.0,2.69145,2.6921,2.6851,2.68185,2.6823
max,3.898555,1.732051,1.0,5.325799,5.2932,5.334001,5.2786,5.282


#### Pandas Profiling

Try Pandas Profiling to get an overview about the data, mostly its distributions and correlations
<br>


In [5]:
from pandas_profiling import ProfileReport
profile = ProfileReport(df_input, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_widgets()

HBox(children=(FloatProgress(value=0.0, description='variables', max=10.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='correlations', max=6.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='interactions [continuous]', max=36.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='table', max=1.0, style=ProgressStyle(description_width='i…




HBox(children=(FloatProgress(value=0.0, description='missing', max=2.0, style=ProgressStyle(description_width=…









HBox(children=(FloatProgress(value=0.0, description='package', max=1.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='build report structure', max=1.0, style=ProgressStyle(des…




Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(value='Number of va…

#### Customer suggested a correlation between vibration and acceleration

so let's try to predict (although correlation tests do not really indicate it)

In [6]:
# Run Monitoring's anomaly detector functions
EngineLogging.configure_console_logging(logging.DEBUG)

gbmii = GBMRegressor(features=['accel_power_0'], max_depth=20, num_leaves=40, threshold=2,
                     n_estimators=4000, learning_rate=0.00001, targets=['vibrations'],
                     predictions=['vibration_pred'])

jobsettings = { 'db': db, 
               '_db_schema': 'public', 'save_trace_to_file' : True}
et = gbmii._build_entity_type(columns = [Column('accel_power0',Float())], **jobsettings)
gbmii._entity_type = et

# allow training and delete existing models
gbmii.auto_train = True
gbmii.delete_existing_models = True
df_input = gbmii.execute(df=df_input)

2020-03-03T08:50:29.851 DEBUG iotfunctions.enginelog.configure_console_logging Console logging has been configured. Level = 10
2020-03-03T08:50:29.852 DEBUG iotfunctions.metadata.__init__ Initializing new entity type using iotfunctions 2.0.3
2020-03-03T08:50:29.853 DEBUG iotfunctions.util.__init__ Starting trace
2020-03-03T08:50:29.853 DEBUG iotfunctions.util.__init__ Trace name: auto_trace_test_entity_for_GBMRegressor_20200303075029
2020-03-03T08:50:29.854 DEBUG iotfunctions.util.__init__ auto_save None
2020-03-03T08:50:29.855 DEBUG iotfunctions.util.categorize_args categorizing arguments
2020-03-03T08:50:29.855 DEBUG iotfunctions.metadata.__init__ Initialized entity type 
LocalEntityType:TEST_ENTITY_FOR_GBMREGRESSOR
Functions:
Granularities:
No schedules metadata
2020-03-03T08:50:29.860 DEBUG urllib3.connectionpool._new_conn Starting new HTTPS connection (1): s3-api.us-geo.objectstorage.softlayer.net
2020-03-03T08:50:32.214 DEBUG urllib3.connectionpool._make_request https://s3-api.us

  "columns for table '%s'" % (flavor, c, table_name)


2020-03-03T08:50:36.624 DEBUG iotfunctions.base.execute_preprocessing Completed preprocessing
2020-03-03T08:50:36.633 DEBUG iotfunctions.util.log_df_info training set df count: 6896  ; index: entity,timestamp  ; columns: vibrations,rms,accel_speed,accel_power_0,accel_power_1,accel_power_2,accel_power_3,accel_power_4
2020-03-03T08:50:36.634 DEBUG iotfunctions.util.log_df_info test set df count: 1724  ; index: entity,timestamp  ; columns: vibrations,rms,accel_speed,accel_power_0,accel_power_1,accel_power_2,accel_power_3,accel_power_4
2020-03-03T08:50:36.636 INFO iotfunctions.base.execute Prepare to train model {
 "name": "model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations",
 "target": "vibrations",
 "features": [
  "accel_power_0"
 ],
 "estimator_name": null,
 "eval_metric_name": "r2_score",
 "eval_metric_train": null,
 "eval_metric_test": null,
 "trained_date": null,
 "expiry_date": null
}
2020-03-03T08:50:36.638 INFO mmfunctions.anomaly.set_estimators GBMRegressor start search 



2020-03-03T08:51:15.536 DEBUG iotfunctions.base.fit_with_search_cv Used randomize search cross validation to find best hyper parameters for estimator RandomizedSearchCV
2020-03-03T08:51:15.537 INFO iotfunctions.base.find_best_model Trained model: 0
2020-03-03T08:51:15.922 INFO iotfunctions.base.find_best_model Trained model: 0 score:0.002047056433677863
2020-03-03T08:51:16.055 INFO iotfunctions.metadata.test evaluated model model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations with evaluation metric value 0.0005335272716041617




2020-03-03T08:51:53.083 DEBUG iotfunctions.base.fit_with_search_cv Used randomize search cross validation to find best hyper parameters for estimator RandomizedSearchCV
2020-03-03T08:51:53.084 INFO iotfunctions.base.find_best_model Trained model: 1
2020-03-03T08:51:53.498 INFO iotfunctions.base.find_best_model Trained model: 1 score:0.002047056433677863
2020-03-03T08:51:53.627 INFO iotfunctions.metadata.test evaluated model model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations with evaluation metric value 0.0005335272716041617




2020-03-03T08:52:43.125 DEBUG iotfunctions.base.fit_with_search_cv Used randomize search cross validation to find best hyper parameters for estimator RandomizedSearchCV
2020-03-03T08:52:43.125 INFO iotfunctions.base.find_best_model Trained model: 2
2020-03-03T08:52:43.536 INFO iotfunctions.base.find_best_model Trained model: 2 score:0.002047056433677863
2020-03-03T08:52:43.634 INFO iotfunctions.metadata.test evaluated model model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations with evaluation metric value 0.0005335272716041617




2020-03-03T08:53:27.548 DEBUG iotfunctions.base.fit_with_search_cv Used randomize search cross validation to find best hyper parameters for estimator RandomizedSearchCV
2020-03-03T08:53:27.548 INFO iotfunctions.base.find_best_model Trained model: 3
2020-03-03T08:53:27.888 INFO iotfunctions.base.find_best_model Trained model: 3 score:0.002047056433677863
2020-03-03T08:53:27.977 INFO iotfunctions.metadata.test evaluated model model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations with evaluation metric value 0.0005335272716041617




2020-03-03T08:54:13.136 DEBUG iotfunctions.base.fit_with_search_cv Used randomize search cross validation to find best hyper parameters for estimator RandomizedSearchCV
2020-03-03T08:54:13.136 INFO iotfunctions.base.find_best_model Trained model: 4
2020-03-03T08:54:13.448 INFO iotfunctions.base.find_best_model Trained model: 4 score:0.002047056433677863
2020-03-03T08:54:13.543 INFO iotfunctions.metadata.test evaluated model model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations with evaluation metric value 0.0005335272716041617
2020-03-03T08:54:13.564 DEBUG iotfunctions.base.execute Trained model: {
 "name": "model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.vibrations",
 "target": "vibrations",
 "features": [
  "accel_power_0"
 ],
 "estimator_name": "light_gradient_boosted_regressor",
 "eval_metric_name": "r2_score",
 "eval_metric_train": 0.002047056433677863,
 "eval_metric_test": 0.0005335272716041617,
 "trained_date": "2020-03-03T07:51:15.924770",
 "expiry_date": null
}
2020-03-03

In [7]:
# now predict 
EngineLogging.configure_console_logging(logging.DEBUG)
gbmii = GBMRegressor(features=['accel_power_0'], max_depth=100000, num_leaves=40, threshold=2,
                     n_estimators=1000, learning_rate=0.001, targets=['vibrations'],
                     predictions=['vibration_pred'])

jobsettings = { 'db': db, 
               '_db_schema': 'public', 'save_trace_to_file' : True}
et = gbmii._build_entity_type(columns = [Column('accel_power0',Float())], **jobsettings)
gbmii._entity_type = et

df_input = gbmii.execute(df=df_input)

2020-03-03T09:08:05.430 DEBUG iotfunctions.enginelog.configure_console_logging Console logging has been configured. Level = 10
2020-03-03T09:08:05.431 DEBUG iotfunctions.metadata.__init__ Initializing new entity type using iotfunctions 2.0.3
2020-03-03T09:08:05.432 DEBUG iotfunctions.util.__init__ Starting trace
2020-03-03T09:08:05.432 DEBUG iotfunctions.util.__init__ Trace name: auto_trace_test_entity_for_GBMRegressor_20200303080805
2020-03-03T09:08:05.433 DEBUG iotfunctions.util.__init__ auto_save None
2020-03-03T09:08:05.433 DEBUG iotfunctions.util.categorize_args categorizing arguments
2020-03-03T09:08:05.434 DEBUG iotfunctions.metadata.__init__ Initialized entity type 
LocalEntityType:TEST_ENTITY_FOR_GBMREGRESSOR
Functions:
Granularities:
No schedules metadata
2020-03-03T09:08:05.435 INFO iotfunctions.base.get_models_for_training predicting target vibrations
2020-03-03T09:08:05.438 DEBUG urllib3.connectionpool._new_conn Starting new HTTPS connection (1): s3-api.us-geo.objectstorag