#### Preparations

* Checkout code with `git clone -b extreme_anomaly_package ssh://git@github.com/ankit-jha/addCustomIotFn`
* cd into the directory, adapt setup.py: turn all `==` operators into `>=` to avoid accidentially uninstalling stuff
* Run `pip install .`

In [1]:
# Real life data

import logging
import threading
import itertools
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import seaborn as seabornInstance
from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, func
from iotfunctions import base
from iotfunctions import bif
from iotfunctions import entity
from iotfunctions import metadata
from iotfunctions.metadata import EntityType
from iotfunctions.db import Database
from iotfunctions.enginelog import EngineLogging
from iotfunctions import estimator
from iotfunctions.ui import (UISingle, UIMultiItem, UIFunctionOutSingle,
                 UISingleItem, UIFunctionOutMulti, UIMulti, UIExpression,
                 UIText, UIStatusFlag, UIParameters)
from mmfunctions.anomaly import (SaliencybasedGeneralizedAnomalyScore, SpectralAnomalyScore,
                 FFTbasedGeneralizedAnomalyScore, KMeansAnomalyScore, GBMRegressor)
from extremeanomaly.extremeanomalygenerator import ExtremeAnomalyGenerator

import datetime as dt
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import scipy as sp
import scipy.fftpack
import skimage as ski  
from skimage import util as skiutil # for nifty windowing
import pyod as pyod
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

EngineLogging.configure_console_logging(logging.INFO)



In [3]:
# need a helper function to convert array columns to something easier
from scipy import linalg
def l2norm(df, tcol, col1, col2 = None, col3 = None):
    def str_norm(cols_str):
        '''norm for one string element (encodes an array of value)
           in one column of a data point'''
        return linalg.norm(np.fromstring(cols_str.replace('[',' ').replace(']','').replace('\"', ''), sep = ','))**2

    def column_norm(df, tcol, col1, col2=None, col3=None):
        '''norm of all columns specified in parameters for all datapoints'''
        df_temp = pd.DataFrame()
        df_temp['col1_np'] = df[col1].apply(str_norm)
        df_temp['col2_np'] = 0 
        df_temp['col3_np'] = 0
        if col2 is not None:
            df_temp['col2_np'] = df[col2].apply(str_norm)
        if col3 is not None:
            df_temp['col3_np'] = df[col3].apply(str_norm)

        return (df_temp['col1_np'] + df_temp['col2_np'] + df_temp['col3_np'])**(1/2)
    
    df[tcol] = column_norm(df, tcol, col1, col2, col3)


def unrollAccel(df):
    l0,l1,l2,l3,l4=[],[],[],[],[]
    for i in df['ACCEL_POWER'].values:
        l0.append(eval(eval(i)[0]))
        l1.append(eval(eval(i)[1]))
        l2.append(eval(eval(i)[2]))
        l3.append(eval(eval(i)[3]))
        l4.append(eval(eval(i)[4]))
    df['accel_power_0'] = np.asarray(l0)
    df['accel_power_1'] = np.asarray(l1)
    df['accel_power_2'] = np.asarray(l2)
    df['accel_power_3'] = np.asarray(l3)
    df['accel_power_4'] = np.asarray(l4)
    
listAttr = ['timestamp','entity','vibrations','rms','accel_speed','accel_power_0','accel_power_1',
            'accel_power_2','accel_power_3','accel_power_4']

In [4]:
# Now we proceed to customer data - GOOD CASE

# Get stuff in
df_input_raw = pd.read_csv('./Armstark04714B6046D5.csv', index_col=False, parse_dates=['RCV_TIMESTAMP_UTC'])
df_input_raw['entity']=df_input_raw['DEVICE_ID']
df_input_raw['timestamp']=df_input_raw['RCV_TIMESTAMP_UTC']

# and sort it by timestamp
df_input_raw = df_input_raw.sort_values(by='timestamp')
df_input_raw = df_input_raw.set_index(['entity','timestamp']).dropna()

l2norm(df_input_raw, 'vibrations', 'VIBRATIONS_XAXIS', 'VIBRATIONS_YAXIS', 'VIBRATIONS_ZAXIS')
l2norm(df_input_raw, 'rms', 'RMS_X', 'RMS_Y', 'RMS_Z')
l2norm(df_input_raw, 'accel_speed', 'ACCEL_SPEED')
unrollAccel(df_input_raw)
#l2norm(df_input_raw, 'accel_power', 'ACCEL_POWER')

df_input = df_input_raw.filter(listAttr, axis=1)
df_input_raw.describe()

Unnamed: 0,vibrations,rms,accel_speed,accel_power_0,accel_power_1,accel_power_2,accel_power_3,accel_power_4
count,8620.0,8620.0,8620.0,8620.0,8620.0,8620.0,8620.0,8620.0
mean,3.759047,2.17046,2208.854495,2.453523,2.453312,2.452808,2.452053,2.452439
std,0.033272,0.002922,589.92554,0.856247,0.857594,0.857678,0.8572,0.85634
min,3.616515,2.155864,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.737696,2.169444,2316.566425,2.2518,2.2518,2.252,2.252,2.2518
50%,3.757949,2.1708,2318.802493,2.3138,2.3134,2.3118,2.3122,2.3126
75%,3.780154,2.171949,2379.176328,2.69145,2.6921,2.6851,2.68185,2.6823
max,3.898555,2.184292,2672.101233,5.325799,5.2932,5.334001,5.2786,5.282


#### Pandas Profiling

Try [Pandas Profiling](https://github.com/pandas-profiling/pandas-profiling) first, since srom's `DataQualityAdvisor` is currently not usable. 


In [5]:
from pandas_profiling import ProfileReport
profile = ProfileReport(df_input, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_widgets()

HBox(children=(FloatProgress(value=0.0, description='variables', max=10.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='correlations', max=6.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='interactions [continuous]', max=64.0, style=ProgressStyle…




HBox(children=(FloatProgress(value=0.0, description='table', max=1.0, style=ProgressStyle(description_width='i…




HBox(children=(FloatProgress(value=0.0, description='missing', max=2.0, style=ProgressStyle(description_width=…









HBox(children=(FloatProgress(value=0.0, description='package', max=1.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='build report structure', max=1.0, style=ProgressStyle(des…




Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(value='Number of va…

#### DataQualityAdvisor

Import after pip-installing it from [IBM enterprise github](https://github.ibm.com/srom/DataQualityAdvisor/)



In [6]:
from dqlearn.ui.interactive.data_loader import data_loader_configuration_ui, data_loader_configuration, load_data
from dqlearn.ui.interactive.data_loader import load_data
from dqlearn.ui.interactive.display_data import display_data_ui
from dqlearn.ui.interactive.metadata_gen import column_metadata_dict, user_metadata_input_ui
from dqlearn.ui.interactive.data_insight import valid_list_ui
from dqlearn.dask.base_functions import value_properness_checks
from dqlearn.dask.base_functions import duplicate_value_checks
from dqlearn.dask.base_functions import uniqueness_checks
from dqlearn.tabular.transactional import check_low_variance_variables
from dqlearn.dask.base_functions import distribution_analyzer
from dqlearn.ai.ensemble_anomaly import anomaly_detection_isolation_forest
from dqlearn.ui.interactive.data_insight import valid_output_and_plot_ui
from dqlearn.ai.modelling_fit import check_data_modelling_fit
from dqlearn.ui.interactive.data_insight import valid_output_only_ui
from dqlearn.tabular.correlation import correlation_information
# from autodive.autoimpute import check_data_imputation

In [16]:
display_data_ui(df_input)

Tab(children=(HBox(children=(VBox(children=(HBox(children=(FloatRangeSlider(value=(3.6165146163122306, 3.89855…

Output(layout=Layout(border='solid', height='350px', overflow_x='scroll', overflow_y='scroll', width='auto'))

In [7]:
user_metadata_input_ui(df_input)
column_metadata_dict


VBox(children=(HBox(children=(Button(button_style='success', description='vibrations', layout=Layout(height='a…

HBox(children=(VBox(children=(HTML(value="<h3><font color='blue'>'accel_power_2'</h3>"), HTML(value='<b>Column…

In [21]:
for i in column_metadata_dict:
    i = ['float64', 'data', 'numeric']
column_metadata_dict

{'vibrations': ['float64', 'data', 'numeric'],
 'rms': ['float64'],
 'accel_speed': ['float64'],
 'accel_power_0': ['float64'],
 'accel_power_1': ['float64'],
 'accel_power_3': ['float64'],
 'accel_power_4': ['float64'],
 'accel_power_2': ['float64']}

**Unfortunately the package is currently not in a usable state**

Last slack exchange:
```
@markus_mueller due to some changes in Ipywidgets and plotly version, this issue comes up. I'm looking into it and will let you know```

<br>

In [24]:
value_properness_analysis_result = value_properness_checks(df_input, column_metadata_dict)
valid_list_ui(value_properness_analysis_result)

Tab(children=(VBox(children=(HTML(value='<h1>Summary Report</h1>'), HBox(children=(HTML(value='<b>Check Passed…

In [25]:
validation_check_low_variance_variables = check_low_variance_variables(df_input)
valid_output_and_plot_ui(validation_check_low_variance_variables)[0]


VBox(children=(HTML(value='<h1>Low Variance Check</h1>'), HBox(children=(VBox(children=(HBox(children=(HTML(va…

In [26]:
distribution_result = distribution_analyzer(df_input, column_metadata_dict)
valid_list_ui(distribution_result)

Tab(children=(VBox(children=(HTML(value='<h1>Summary Report</h1>'), HBox(children=(HTML(value='<b>Check Passed…

In [22]:
valid_correlation = correlation_information(df_input, column_metadata_dict)
valid_output_and_plot_ui(valid_correlation)[0]

VBox(children=(HTML(value='<h1>Correlation Information</h1>'), HBox(children=(VBox(children=(HBox(children=(HT…

In [9]:
distribution_result = distribution_analyzer(df_input, column_metadata_dict)
valid_list_ui(distribution_result)

Tab(children=(VBox(children=(HTML(value='<h1>Summary Report</h1>'), HBox(children=(HTML(value='<b>Check Passed…

In [8]:
# now run anomaly 
EngineLogging.configure_console_logging(logging.DEBUG)
blrub
extanomii = ExtremeAnomalyGenerator(input_item='vibrations',  output_item='vibrations_anom', factor=3, size=8)
jobsettings = { 'db': db, 
               '_db_schema': 'public', 'save_trace_to_file' : True}
et = extanomii._build_entity_type(columns = [Column('vibrations_anom',Float())], **jobsettings)
extanomii._entity_type = et

df_input = extanomii.execute(df=df_input)

2020-02-24T19:14:15.527 DEBUG iotfunctions.enginelog.configure_console_logging Console logging has been configured. Level = 10


NameError: name 'blrub' is not defined

In [1]:
df_input2 = df_input.loc[['04714B6046D5']]
df_input2.reset_index(level=[0], inplace=True)
Temperature='vibrations'
TempAnom='vibrations_anom'

NameError: name 'df_input' is not defined

In [2]:
# Side plot - digging a bit deeper to look for seasonalities

plots = 2
df_sub1 = df_input2['2020-01-05 00':'2020-01-05 02']
df_sub2 = df_input2['2020-01-05 02':'2020-01-05 04']


fig, ax = plt.subplots(plots, 1, figsize=(20,10))
cnt = 0
ax[cnt].plot(df_sub1.index, df_sub1[Temperature],lw=2,color='blue',label=Temperature)
ax[cnt].plot(df_sub1.index, df_sub2[TempAnom],lw=2,color='red',label=TempAnom)
ax[cnt].legend(bbox_to_anchor=(1.1, 1.05))
ax[cnt].set_ylabel('Input data - 2 hours', fontsize=14)
cnt = 1
ax[cnt].plot(df_sub2.index, df_sub1[Temperature],lw=2,color='blue',label=Temperature)
ax[cnt].plot(df_sub2.index, df_sub2[TempAnom],lw=2,color='red',label=TempAnom)
ax[cnt].legend(bbox_to_anchor=(1.1, 1.05))
ax[cnt].set_ylabel('Input data - next 2 hours', fontsize=14)

NameError: name 'df_input2' is not defined