# ------------ TIM Python Client - KPI Driven Anomaly Detection ------------

# 0. Setup

In [2]:
import pandas as pd
import json
import datetime as dt
import numpy as np
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as splt

In [3]:
import tim
client = tim.Tim(email='',password='',server='')

In [4]:
def call_tim_function(action,object_type,arguments=None):
    functions = {
        'list':{
            'user_group':client.user_groups.list_user_group,
            'workspace':client.workspaces.list_workspace,
            'dataset':client.datasets.list_dataset,
            'dataset_version':client.datasets.list_dataset_versions,
            'use_case':client.use_cases.list_use_case,
            'experiment':client.experiments.list_experiment
        },
        'create':{
            'user_group':client.user_groups.create_user_group,
            'workspace':client.workspaces.create_workspace,
            'use_case':client.use_cases.create_use_case,
            'experiment':client.experiments.create_experiment
        }
    }
    response = functions[action][object_type](**arguments)
    return response

def create_tim_object_configuration(pipeline,object_type,parameters,name):
    
    if object_type=='user_group':
        configuration = {"name": name,"users": [{"id": client.users.details_user()['id'],"isOwner": True}]}
    if object_type=='workspace':
        configuration = {"name": name,"userGroup": {"id": parameters['user_group_id']}}
    if object_type=='dataset':
        try:
            versionName = pipeline['dataset_version']['name']
        except:
            versionName = 'initial upload'
        configuration = {"name": name,"workspace": {"id": parameters['workspace_id']},"versionName":versionName}
    if object_type=='dataset_version':
        configuration = update_dataset_configuration = {"versionName": name}
    if object_type=='use_case':
        configuration = {"name": name,"workspace": {"id": parameters['workspace_id']},"dataset": {"id": parameters['dataset_id']}}
    if object_type=='experiment':
        configuration = {"name": name,"useCase": {"id": parameters['use_case_id']},"type": pipeline['experiment']['create']['type']}
    return configuration

def check_tim_object(pipeline,object_type,parameters):
    try:
        object_id = pipeline[object_type]['id']
        print(object_type,'id available.')
    except:
        try:
            object_name = pipeline[object_type]['name']
            object_list = [f for f in call_tim_function('list',object_type,parameters) if f['name']==object_name]
            tim_object = object_list[0]
            object_id = tim_object['id']
            print(object_type,'found by name.')
        except:
            try:
                add_to_configuration = pipeline[object_type]['create']['configuration']
                object_name = add_to_configuration['versionName'] if object_type == 'dataset_version' else add_to_configuration['name']
                create_configuration = create_tim_object_configuration(pipeline,object_type,parameters,object_name)
                object_configuration = {**add_to_configuration, **create_configuration}
            except:
                object_name = pipeline[object_type]['name'] 
                object_configuration = create_tim_object_configuration(pipeline,object_type,parameters,object_name)
            if object_type == 'dataset':
                tim_file = pipeline[object_type]['create']['file']
                tim_upload = client.upload_dataset(
                    dataset = tim_file,
                    configuration = object_configuration,
                    outputs = ['response'],
                    status_poll = print,
                    tries_left = 300
                )
                tim_object = tim_upload.response
            elif object_type == 'dataset_version':
                tim_file = pipeline[object_type]['create']['file']
                tim_update = client.update_dataset(
                    dataset_id = parameters['id'],
                    dataset_version = tim_file,
                    configuration = object_configuration,
                    outputs = ['response'],
                    status_poll = print,
                    tries_left = 300
                )
                tim_object = tim_update.response['version']                
            else:
                tim_object = call_tim_function('create',object_type,{'configuration':object_configuration})
            object_id = tim_object['id']
            print(object_type,'created.')
    return object_id

def tim_pipeline_setup(pipeline):
    try:
        response = {'name':pipeline['name']}
    except:
        pass
    try:
        user_group_id = check_tim_object(pipeline=pipeline,object_type='user_group',parameters={})
        response['user_group'] = user_group_id
    except:
        pass
    try:
        workspace_id = check_tim_object(pipeline=pipeline,object_type='workspace',parameters={'user_group_id':user_group_id})
        response['workspace'] = workspace_id
    except:
        pass
    dataset_id = check_tim_object(pipeline=pipeline,object_type='dataset',parameters={'workspace_id':workspace_id})
    response['dataset'] = dataset_id
    try:
        dataset_version_id = check_tim_object(pipeline=pipeline,object_type='dataset_version',parameters={'id':dataset_id})
        response['dataset_version'] = dataset_version_id
    except:
        pass
    try:
        use_case_id = check_tim_object(pipeline=pipeline,object_type='use_case',parameters={'workspace_id':workspace_id,'dataset_id':dataset_id})
        response['use_case'] = use_case_id    
    except:
        pass
    try:
        experiment_id = check_tim_object(pipeline=pipeline,object_type='experiment',parameters={'use_case_id':use_case_id})
        response['experiment'] = experiment_id   
    except:
        pass
    return response

# 1. Data Preparation

In [5]:
csv_df = pd.read_csv('production_line.csv')
print(csv_df.columns)

Index(['Datetime', 'Output', 'AmbientHumidity', 'AmbientTemperature',
       'M1_RawMaterial_1', 'M1_RawMaterial_2', 'M1_RawMaterial_3',
       'M1_RawMaterial_4', 'M1_RawMaterialFeeder', 'M1_Zone1_Temperature',
       'M1_Zone2_Temperature', 'M1_MotorAmperage', 'M1_MotorRPM',
       'M1_MaterialPressure', 'M1_MaterialTemperature',
       'M1_ExitZoneTemperature', 'M2_RawMaterial_1', 'M2_RawMaterial_2',
       'M2_RawMaterial_3', 'M2_RawMaterial_4', 'M2_RawMaterialFeeder',
       'M2_Zone1_Temperature', 'M2_Zone2_Temperature', 'M2_MotorAmperage',
       'M2_MotorRPM', 'M2_MaterialPressure', 'M2_MaterialTemperature',
       'M2_ExitZoneTemperature', 'M3_RawMaterial_1', 'M3_RawMaterial_2',
       'M3_RawMaterial_3', 'M3_RawMaterial_4', 'M3_RawMaterialFeeder',
       'M3_Zone1_Temperature', 'M3_Zone2_Temperature', 'M3_MotorAmperage',
       'M3_MotorRPM', 'M3_MaterialPressure', 'M3_MaterialTemperature',
       'M3_ExitZoneTemperature', 'Stage_1_Temperature1',
       'Stage_1_Temperature2'

In [6]:
tim_dataset = csv_df.copy()
timestamp = 'Datetime'
target = 'Output'
predictors = [s for s in list(tim_dataset.columns) if s not in [timestamp,target]]
tim_dataset = tim_dataset[[timestamp,target]+predictors].reset_index(drop=True)

In [None]:
v_data = tim_dataset.copy()
fig = splt.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)
fig.add_trace(go.Scatter(x=v_data[timestamp], y=v_data[target], name=target,connectgaps=True), row=1, col=1)
for idx, p in enumerate(predictors): fig.add_trace(go.Scatter(x=v_data[timestamp], y=v_data[p], name=p,connectgaps=True), row=2, col=1)
fig.update_layout(height=600, width=1200, title_text="Data visualization")
fig.show()    

# 2. TIM

## 2.1 Workflow

In [7]:
upload_dataset_configuration = {
    # "timestampColumn": timestamp,
    # "groupKeys": group_keys,
    "name": "production_line"
}
# -----------------------------------------------------------------------------------------------------------------------
user_group = {
    # 'id':'135c50cd-e6ea-423e-ae2b-581564cb9cbc',
    'name':'POV',
    # 'create':{'configuration':create_user_group_configuration}
}
workspace = {
    # 'id':'e3e34c8f-3864-4199-af4b-70366c6a79db',
    'name':'Templates',
    # 'create':{'configuration':create_workspace_configuration}
}
dataset = {
    # 'id':'0c217702-6d7c-4349-9345-a5d8c9120881',
    # 'name':upload_dataset_configuration['name'],
    'create':{'configuration':upload_dataset_configuration,'file':tim_dataset}
}
dataset_version = {
    # 'id':'03ea3953-3956-4155-8ad8-f3c95daadd5f',
    # 'name':'panel_data_demo',
    # 'create':{'configuration':update_dataset_configuration,'file':tim_dataset.tail(28)}
}
use_case = {
    # 'id':'3b8ad8ce-8516-4ed4-a9e7-e891ed5e176c',
    'name':'production_line',
    # 'create':{'configuration':create_use_case_configuration}
}
experiment = {
    # 'id':'bc82c706-be70-4726-8976-dbadf75e7385',
    'name':'KPI Driven Anomaly Detection',
    'create':{
        # 'configuration':create_use_case_configuration,
        'type':'AnomalyDetection'
    }
}
# -----------------------------------------------------------------------------------------------------------------------
pipeline_input = {
    'name':'pipeline_1',
    'user_group':user_group,
    'workspace':workspace,
    'dataset':dataset,
    'dataset_version':dataset_version,
    'use_case':use_case,
    'experiment':experiment
}
# -----------------------------------------------------------------------------------------------------------------------
pipeline_response = tim_pipeline_setup(pipeline=pipeline_input)
pipeline_response

user_group found by name.
workspace created.
{'status': 'Running', 'progress': 0.0, 'createdAt': '2023-10-18T07:59:55.111Z'}
{'status': 'Running', 'progress': 0.0, 'createdAt': '2023-10-18T07:59:55.111Z'}
{'status': 'Running', 'progress': 0.0, 'createdAt': '2023-10-18T07:59:55.111Z'}
{'status': 'Running', 'progress': 28.42, 'createdAt': '2023-10-18T07:59:55.111Z'}
{'status': 'Running', 'progress': 86.98, 'createdAt': '2023-10-18T07:59:55.111Z'}
{'status': 'Running', 'progress': 95.0, 'createdAt': '2023-10-18T07:59:55.111Z'}
dataset created.
use_case created.
experiment created.


{'name': 'pipeline_1',
 'user_group': '1dfc6ff1-06e2-4dcc-a58b-c09978ad79d1',
 'workspace': 'dbca5351-4614-4109-84b9-a90b22311a26',
 'dataset': 'ae35b57c-6e6f-4060-ba2b-b89da2c96043',
 'use_case': '27450439-d6f2-4292-a7ee-e020ed9d903c',
 'experiment': '8eb9eefd-aa77-4140-8dca-f17429f64e90'}

In [12]:
dataset_id = pipeline_response['dataset']
use_case_id = pipeline_response['use_case']
experiment_id = pipeline_response['experiment']

## 2.2 Model Building

### 2.2.1 Configuration

In [13]:
in_sample_rows = int(len(tim_dataset)*2/3)

In [14]:
detection_build_kpi_model_configuration = {
#     "name": "My first anomaly build-model job",
    "useCase": {"id": use_case_id},
    "experiment": {"id": experiment_id},
#     "configuration": {
#         "domainSpecifics": [
#             {
#                 "perspective": "Residual",
#                 "sensitivity": 0,
#                 "minSensitivity": 0,
#                 "maxSensitivity": 0
#             }
#         ],
#         "normalBehaviorModel": {
#             "useNormalBehaviorModel": True,
#             "normalization": True,
#             "maxModelComplexity": 50,
#             "features": [
#                 "ExponentialMovingAverage",
#                 "TimeOffsets",
#                 "Identity",
#                 "Intercept"
#             ],
#             "dailyCycle": true,
#             "useKPIoffsets": true,
#             "allowOffsets": true,
#             "offsetLimit": {"type": "Explicit","value": 0}
#         },
#         "anomalousBehaviorModel": {
#             "maxModelComplexity": 15,
#             "detectionIntervals": [
#                 {"type": "Hour","value": "8-16"}
#             ]
#         }
#     },
    "data": {
#         "version": {"id": "a74ae716-a86e-47f0-8a50-d8b21d6d7dd6"},
        "rows": {"type":"First","baseUnit": "Sample","value": in_sample_rows}, #{"type":"Last","baseUnit": "Sample","value": 1} or [{"from": "yyyy-mm-dd HH:MM:SS","to": "yyyy-mm-dd HH:MM:SS"}]
#         "columns": [
#             1,
#             3,
#             "wind_speed"
#         ],
#         "KPIColumn": "rotor_speed",
#         "holidayColumn": "PH",
#         "labelColumn": "LABEL",
#         "imputation": {"type": "LOCF","maxGapLength": 6},
#         "timeScale": {"baseUnit": "Hour","value": 1},
#         "aggregation": "Mean",
#         "updates": [
#             {
#                 "column": "wind_speed",
#                 "updateTime": [
#                     {"type": "Hour","value": "1,12,23"}
#                 ],
#                 "updateUntil": {"baseUnit": "Hour","offset": -2}
#             }
#         ]
    }
}
detection_build_kpi_model_configuration

{'useCase': {'id': '27450439-d6f2-4292-a7ee-e020ed9d903c'},
 'experiment': {'id': '8eb9eefd-aa77-4140-8dca-f17429f64e90'},
 'data': {'rows': {'type': 'First', 'baseUnit': 'Sample', 'value': 9392}}}

### 2.2.2 API Call

In [15]:
detection_build_kpi_model = client.detection_build_kpi_model(
    configuration = detection_build_kpi_model_configuration,
    # dataset_id = dataset_id,
    # execute = True,
    # wait_to_finish = True,
    outputs = [
        'id',
        'details',
        'logs',
        'status',
        'table',
        'model',
        'accuracies'
    ],
    status_poll = print,
    # tries_left = 300
 )

{'status': 'Running', 'createdAt': '2023-10-18T08:01:26.010Z'}
{'status': 'Running', 'progress': 6.5, 'CPU': 0.13, 'memory': 2413.0, 'createdAt': '2023-10-18T08:01:30.013Z'}
{'status': 'Running', 'progress': 20.0, 'CPU': 0.13, 'memory': 2443.0, 'createdAt': '2023-10-18T08:01:32.325Z'}
{'status': 'Running', 'progress': 60.0, 'CPU': 0.13, 'memory': 2493.0, 'createdAt': '2023-10-18T08:01:34.434Z'}
{'status': 'Running', 'progress': 60.0, 'CPU': 0.13, 'memory': 2493.0, 'createdAt': '2023-10-18T08:01:34.434Z'}
{'status': 'Running', 'progress': 77.5, 'CPU': 0.5, 'memory': 2523.0, 'createdAt': '2023-10-18T08:01:40.347Z'}
{'status': 'Running', 'progress': 77.5, 'CPU': 0.5, 'memory': 2523.0, 'createdAt': '2023-10-18T08:01:40.347Z'}
{'status': 'Finished', 'progress': 100.0, 'CPU': 0.5, 'memory': 2524.0, 'createdAt': '2023-10-18T08:01:43.805Z'}


In [16]:
detection_build_kpi_model_id = detection_build_kpi_model.id
detection_build_kpi_model_details = detection_build_kpi_model.details
detection_build_kpi_model_logs = detection_build_kpi_model.logs
detection_build_kpi_model_status = detection_build_kpi_model.status
detection_build_kpi_model_table = detection_build_kpi_model.table
detection_build_kpi_model_model = detection_build_kpi_model.model
detection_build_kpi_model_accuracies = detection_build_kpi_model.accuracies

## 2.3 Detection Inference

### 2.3.1 Configuration

In [17]:
out_sample_rows = int(len(tim_dataset)-in_sample_rows)

In [18]:
detection_detect_configuration = {
#     "name": "My first anomaly detect job",
    "experiment": {"id":experiment_id},
    "data": {
#         "version": {"id": "a74ae716-a86e-47f0-8a50-d8b21d6d7dd6"},
        "rows": {"type":"Last","baseUnit": "Sample","value": out_sample_rows}, #{"type":"Last","baseUnit": "Sample","value": 1} or [{"from": "yyyy-mm-dd HH:MM:SS","to": "yyyy-mm-dd HH:MM:SS"}]
#         "imputation": {"type": "LOCF","maxGapLength": 6}
    }
}

### 2.3.2 API Call

In [19]:
detection_detect = client.detection_detect(
    parent_job_id = detection_build_kpi_model_id,
    configuration = detection_detect_configuration,
    # execute = True,
    # wait_to_finish = True,
    outputs = [
        'id',
        'details',
        'logs',
        'status',
        'table',
        'model',
        'accuracies',
        'production_table',  
    ],
    status_poll = print,
    # tries_left = 300
 )

{'status': 'Running', 'createdAt': '2023-10-18T08:01:54.465Z'}
{'status': 'Running', 'progress': 16.0, 'CPU': 0.46, 'memory': 2520.0, 'createdAt': '2023-10-18T08:01:58.518Z'}
{'status': 'Finished', 'progress': 100.0, 'CPU': 0.5, 'memory': 2522.0, 'createdAt': '2023-10-18T08:01:59.938Z'}


In [20]:
detection_detect_id = detection_detect.id
detection_detect_details = detection_detect.details
detection_detect_logs = detection_detect.logs
detection_detect_status = detection_detect.status
detection_detect_table = detection_detect.table
detection_detect_model = detection_detect.model
detection_detect_accuracies = detection_detect.accuracies
detection_detect_production_table = detection_detect.production_table

# 3. Results

In [21]:
properties_df = client.post_process.properties(detection_build_kpi_model_model)
features_df = client.post_process.features(detection_build_kpi_model_model)
model_logs_df = pd.DataFrame(detection_build_kpi_model_logs).sort_values(by='createdAt').reset_index(drop=True)
detect_logs_df = pd.DataFrame(detection_detect_logs).sort_values(by='createdAt').reset_index(drop=True)

## 3.1 Visual

In [None]:
fig = splt.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)
fig.add_trace(go.Scatter(x=tim_dataset[timestamp], y=tim_dataset[target], name=target, line=dict(color='black')), row=1, col=1)
fig.add_trace(go.Scatter(x=detection_build_kpi_model_table['timestamp'], y=detection_build_kpi_model_table['normal_behavior'], name='InSample Normal Behavior', line=dict(color='goldenrod')), row=1, col=1)
fig.add_trace(go.Scatter(x=detection_detect_table['timestamp'], y=detection_detect_table['normal_behavior'], name='OutOfSample Normal Behavior', line=dict(color='darkgoldenrod')), row=1, col=1)
for ai in [f for f in detection_build_kpi_model_table.columns if 'anomaly_indicator' in f]:
    fig.add_trace(go.Scatter(x=detection_build_kpi_model_table['timestamp'], y=detection_build_kpi_model_table[ai], name= ai.replace('anomaly_indicator_','')+' InSample'), row=2, col=1)
    va = detection_build_kpi_model_table[detection_build_kpi_model_table[ai]>=1]
    fig.add_trace(go.Scatter(x=va['timestamp'], y=va['kpi'], name=ai.replace('anomaly_indicator_','')+' anomaly inSample',mode='markers', line={'color': 'red'}), row=1, col=1)
for ai in [f for f in detection_detect_table.columns if 'anomaly_indicator' in f]:
    fig.add_trace(go.Scatter(x=detection_detect_table['timestamp'], y=detection_detect_table[ai], name= ai.replace('anomaly_indicator_','')+' OutOfSample'), row=2, col=1)
    va = detection_detect_table[detection_detect_table[ai]>=1]
    fig.add_trace(go.Scatter(x=va['timestamp'], y=va['kpi'], name=ai.replace('anomaly_indicator_','')+' anomaly outOfSample',mode='markers', line={'color': 'red'}), row=1, col=1)
fig.add_hline(y=1, line_color="orange", row=2, col=1)
fig.update_layout(height=700, width=1400, title_text="Results")
fig.show()

## 3.2 Anomaly List

In [23]:
detection_table = pd.concat([detection_build_kpi_model_table,detection_detect_table])
anomaly_list_df = detection_table[detection_table['anomaly_code'].isin([1,3])].copy().rename(columns={'anomaly_code':'anomaly'})
anomaly_list_df['timestamp'] = pd.to_datetime(anomaly_list_df['timestamp'],format='%Y-%m-%dT%H:%M:%S.%fZ')
anomaly_list_df

Unnamed: 0,timestamp,model_index,kpi,normal_behavior,anomaly,anomaly_indicator_residual,anomaly_indicator_imbalance
638,2019-03-06 11:03:11,1.0,3.45,0.855126,1.0,1.212612,0.376316
1107,2019-03-06 11:11:01,1.0,0.00,3.555792,1.0,1.664308,0.212900
1109,2019-03-06 11:11:03,1.0,3.72,0.581634,1.0,1.222966,0.134751
1137,2019-03-06 11:11:31,1.0,3.18,1.353532,1.0,1.070858,0.584259
1756,2019-03-06 11:21:50,1.0,3.42,1.425940,1.0,1.069886,0.597299
...,...,...,...,...,...,...,...
4638,2019-03-06 14:46:30,1.0,3.42,3.127911,1.0,0.580257,1.022099
4639,2019-03-06 14:46:31,1.0,3.42,3.150170,1.0,0.524101,1.037345
4640,2019-03-06 14:46:32,1.0,3.38,3.146576,1.0,0.470794,1.043201
4641,2019-03-06 14:46:33,1.0,3.38,3.129912,1.0,0.488658,1.019313


## 3.3 Insights

In [None]:
fig1 = go.Figure(go.Bar(x=properties_df['name'], y=properties_df['rel_importance'],text=round(properties_df['rel_importance'],2),textposition='auto'))
fig1.update_layout(height=500,width=1200,title_text='Predictor Importances',xaxis_title='name',yaxis_title='rel_importance')
print('Predictors not used:'+str(list(set(predictors+[target])-set(list(properties_df['name'])))))
fig1.show()

In [None]:
fig = px.sunburst(features_df, path=['Model','Feature'], values='importance',color='Feature')
fig.update_layout(height=700,width=700,title_text='Feature Importances')
fig.show()

In [26]:
warnings = list(model_logs_df[model_logs_df['messageType'] == "Warning"]['message'])
warnings

[]

In [None]:
model_logs_df

In [None]:
detect_logs_df