# TIM Python Client - KPI Driven Anomaly Detection

# 0. Setup

Import the libraries necessary to run this notebook and set up the python client for TIM.

In [1]:
import pandas as pd
import json
import os
import datetime as dt
import numpy as np
import plotly as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as splt

In [2]:
tutorial_path = os.path.dirname(os.path.dirname(os.getcwd()))

In [3]:
import tim
tim_credentials = json.load(open(tutorial_path+'/tim_credentials.json'))
client = tim.Tim(email=tim_credentials['email'],password=tim_credentials['password'])

# 1. Data Preparation

In [4]:
csv_df = pd.read_csv(tutorial_path+'/datasets/production_line.csv')
print(csv_df.columns)

Index(['Datetime', 'Output', 'AmbientHumidity', 'AmbientTemperature',
       'M1_RawMaterial_1', 'M1_RawMaterial_2', 'M1_RawMaterial_3',
       'M1_RawMaterial_4', 'M1_RawMaterialFeeder', 'M1_Zone1_Temperature',
       'M1_Zone2_Temperature', 'M1_MotorAmperage', 'M1_MotorRPM',
       'M1_MaterialPressure', 'M1_MaterialTemperature',
       'M1_ExitZoneTemperature', 'M2_RawMaterial_1', 'M2_RawMaterial_2',
       'M2_RawMaterial_3', 'M2_RawMaterial_4', 'M2_RawMaterialFeeder',
       'M2_Zone1_Temperature', 'M2_Zone2_Temperature', 'M2_MotorAmperage',
       'M2_MotorRPM', 'M2_MaterialPressure', 'M2_MaterialTemperature',
       'M2_ExitZoneTemperature', 'M3_RawMaterial_1', 'M3_RawMaterial_2',
       'M3_RawMaterial_3', 'M3_RawMaterial_4', 'M3_RawMaterialFeeder',
       'M3_Zone1_Temperature', 'M3_Zone2_Temperature', 'M3_MotorAmperage',
       'M3_MotorRPM', 'M3_MaterialPressure', 'M3_MaterialTemperature',
       'M3_ExitZoneTemperature', 'Stage_1_Temperature1',
       'Stage_1_Temperature2'

In [6]:
tim_dataset = csv_df.copy()
timestamp = 'Datetime'
target = 'Output'
predictors = [s for s in list(tim_dataset.columns) if s not in [timestamp,target]]
tim_dataset = tim_dataset[[timestamp,target]+predictors].reset_index(drop=True)

In [None]:
v_data = tim_dataset.copy()
fig = splt.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)
fig.add_trace(go.Scatter(x=v_data[timestamp], y=v_data[target], name=target,connectgaps=True), row=1, col=1)
for idx, p in enumerate(predictors): fig.add_trace(go.Scatter(x=v_data[timestamp], y=v_data[p], name=p,connectgaps=True), row=2, col=1)
fig.update_layout(height=600, width=1200, title_text="Data visualization")
fig.show()    

# 2. TIM

## 2.1 Workflow

In [10]:
tim_workspace = json.load(open(tutorial_path+'/tim_workflow/tim_workspace.json'))
workspace_id = tim_workspace['id']
# -----------------------------------------------------------------------------------------------------------------------
pipeline = {
    'dataset':'production_line',
    'use_case':'KPI driven anomaly detection',
    'experiment':'KPI driven anomaly detection'
}
# -----------------------------------------------------------------------------------------------------------------------
try:
    dataset_list = [f for f in client.datasets.list_dataset(workspace_id=workspace_id) if f['name']==pipeline['dataset']]
    tim_upload = dataset_list[0]
    dataset_id = tim_upload['id']
    print("Collect dataset")
except:
    now_dt = dt.datetime.now()
    upload_dataset_configuration = {
        "name": pipeline['dataset'],
        "workspace": {
            "id": workspace_id
        },
        "versionName": now_dt.strftime('%Y-%m-%d %H:%M:%S')
    }
    tim_upload = client.upload_dataset(
        dataset = tim_dataset,
        configuration = upload_dataset_configuration,
        wait_to_finish = True,
        outputs = ['response'],
        status_poll = print,
        tries_left = 300
    )
    print("dataset uploaded")
    dataset_id = tim_upload.response['id']
# -----------------------------------------------------------------------------------------------------------------------
try:
    use_case_list = [f for f in client.use_cases.list_use_case(dataset_id = dataset_id) if f['name']==pipeline['use_case']]
    tim_use_case = use_case_list[0]
    print("Collect Use Case")
except:
    create_use_case_configuration = {
        "name": pipeline['use_case'],
        "workspace": {
            "id": workspace_id
        },
        "dataset": {
            "id": dataset_id
        },
    }
    tim_use_case = client.use_cases.create_use_case(configuration=create_use_case_configuration)
    print("Use Case Created")
use_case_id = tim_use_case['id']
# -----------------------------------------------------------------------------------------------------------------------
try:
    experiment_list = [f for f in client.experiments.list_experiment(use_case_id = use_case_id) if f['name']==pipeline['experiment']]
    tim_experiment = experiment_list[0]
    print("Collect experiment")
except:
    create_experiment_configuration = {
      "name": pipeline['experiment'],
      "useCase": {
        "id": use_case_id
      },
      "type": "AnomalyDetection"
    }
    tim_experiment = client.experiments.create_experiment(configuration=create_experiment_configuration)
    print("Experiment created")
experiment_id = tim_experiment['id']

Collect dataset
Use Case Created
Experiment created


## 2.2 Model Building

### 2.2.1 Configuration

In [11]:
in_sample_rows = int(len(tim_dataset)*2/3)
# -----------------------------------------------------------------------------------------------------------------------
detection_build_kpi_model_configuration = {
    "name": "KPI driven anomaly detection",
    "useCase": {"id": use_case_id},
    "experiment": {"id": experiment_id},
#     "configuration": {
#         "domainSpecifics": [
#             {
#                 "perspective": "Residual",
#                 "sensitivity": 0,
#                 "minSensitivity": 0,
#                 "maxSensitivity": 0
#             }
#         ],
#         "normalBehaviorModel": {
#             "useNormalBehaviorModel": True,
#             "normalization": True,
#             "maxModelComplexity": 50,
#             "features": [
#                 "ExponentialMovingAverage",
#                 "TimeOffsets",
#                 "Identity",
#                 "Intercept"
#             ],
#             "dailyCycle": true,
#             "useKPIoffsets": true,
#             "allowOffsets": true,
#             "offsetLimit": {"type": "Explicit","value": 0}
#         },
#         "anomalousBehaviorModel": {
#             "maxModelComplexity": 15,
#             "detectionIntervals": [
#                 {"type": "Hour","value": "8-16"}
#             ]
#         }
#     },
    "data": {
#         "version": {"id": "a74ae716-a86e-47f0-8a50-d8b21d6d7dd6"},
        "rows": {"type":"First","baseUnit": "Sample","value": in_sample_rows}, #{"type":"Last","baseUnit": "Sample","value": 1} or [{"from": "yyyy-mm-dd HH:MM:SS","to": "yyyy-mm-dd HH:MM:SS"}]
#         "columns": [
#             1,
#             3,
#             "wind_speed"
#         ],
#         "KPIColumn": "rotor_speed",
#         "holidayColumn": "PH",
#         "labelColumn": "LABEL",
#         "imputation": {"type": "LOCF","maxGapLength": 6},
#         "timeScale": {"baseUnit": "Hour","value": 1},
#         "aggregation": "Mean",
#         "updates": [
#             {
#                 "column": "wind_speed",
#                 "updateTime": [
#                     {"type": "Hour","value": "1,12,23"}
#                 ],
#                 "updateUntil": {"baseUnit": "Hour","offset": -2}
#             }
#         ]
    }
}
detection_build_kpi_model_configuration

{'name': 'KPI driven anomaly detection',
 'useCase': {'id': '25dfe2fa-b459-4e89-be4c-aa74f4fa0f83'},
 'experiment': {'id': '94f45879-ae97-46e2-8c2d-0f51e7e93187'},
 'data': {'rows': {'type': 'First', 'baseUnit': 'Sample', 'value': 9392}}}

### 2.2.2 API Call

In [12]:
detection_build_kpi_model = client.extended_detection.build_kpi_model(
    configuration = detection_build_kpi_model_configuration,
    # dataset_id = dataset_id,
    # execute = True,
    # wait_to_finish = True,
    outputs = [
        'id',
        'details',
        'logs',
        'status',
        'table',
        'model',
        'accuracies'
    ],
    status_poll = print,
    # tries_left = 300
 )

{'status': 'Running', 'createdAt': '2023-04-20T15:26:48.746Z'}
{'status': 'Running', 'progress': 13.0, 'CPU': 0.06, 'memory': 3571.0, 'createdAt': '2023-04-20T15:26:51.469Z'}
{'status': 'Running', 'progress': 60.0, 'CPU': 0.06, 'memory': 3568.0, 'createdAt': '2023-04-20T15:26:53.477Z'}
{'status': 'Running', 'progress': 60.0, 'CPU': 0.06, 'memory': 3568.0, 'createdAt': '2023-04-20T15:26:53.477Z'}
{'status': 'Running', 'progress': 77.5, 'CPU': 0.14, 'memory': 3736.0, 'createdAt': '2023-04-20T15:26:56.724Z'}
{'status': 'Finished', 'progress': 100.0, 'CPU': 0.45, 'memory': 3742.0, 'createdAt': '2023-04-20T15:26:59.197Z'}


In [13]:
detection_build_kpi_model_id = detection_build_kpi_model.id
detection_build_kpi_model_details = detection_build_kpi_model.details
detection_build_kpi_model_logs = detection_build_kpi_model.logs
detection_build_kpi_model_status = detection_build_kpi_model.status
detection_build_kpi_model_table = detection_build_kpi_model.table
detection_build_kpi_model_model = detection_build_kpi_model.model
detection_build_kpi_model_accuracies = detection_build_kpi_model.accuracies

## 2.3 Detection Inference

### 2.3.1 Configuration

In [16]:
out_sample_rows = int(len(tim_dataset)-in_sample_rows)
# -----------------------------------------------------------------------------------------------------------------------
detection_detect_configuration = {
#     "name": "My first anomaly detect job",
    "experiment": {"id":experiment_id},
    "data": {
#         "version": {"id": "a74ae716-a86e-47f0-8a50-d8b21d6d7dd6"},
        "rows": {"type":"Last","baseUnit": "Sample","value": out_sample_rows}, #{"type":"Last","baseUnit": "Sample","value": 1} or [{"from": "yyyy-mm-dd HH:MM:SS","to": "yyyy-mm-dd HH:MM:SS"}]
#         "imputation": {"type": "LOCF","maxGapLength": 6}
    }
}

### 2.3.2 API Call

In [17]:
detection_detect = client.detection_detect(
    parent_job_id = detection_build_kpi_model_id,
    configuration = detection_detect_configuration,
    # execute = True,
    # wait_to_finish = True,
    outputs = [
        'id',
        'details',
        'logs',
        'status',
        'table',
        'model',
        'accuracies',
        'production_table',  
    ],
    status_poll = print,
    # tries_left = 300
 )

{'status': 'Running', 'createdAt': '2023-04-20T15:27:52.206Z'}
{'status': 'Running', 'progress': 53.19, 'CPU': 0.29, 'memory': 3561.0, 'createdAt': '2023-04-20T15:27:55.407Z'}
{'status': 'Finished', 'progress': 100.0, 'CPU': 0.29, 'memory': 3575.0, 'createdAt': '2023-04-20T15:27:56.452Z'}


In [18]:
detection_detect_id = detection_detect.id
detection_detect_details = detection_detect.details
detection_detect_logs = detection_detect.logs
detection_detect_status = detection_detect.status
detection_detect_table = detection_detect.table
detection_detect_model = detection_detect.model
detection_detect_accuracies = detection_detect.accuracies
detection_detect_production_table = detection_detect.production_table

# 3. Results

In [20]:
properties_df = client.post_process.properties(detection_build_kpi_model_model)
features_df = client.post_process.features(detection_build_kpi_model_model)
model_logs_df = pd.DataFrame(detection_build_kpi_model_logs).sort_values(by='createdAt').reset_index(drop=True)
detect_logs_df = pd.DataFrame(detection_detect_logs).sort_values(by='createdAt').reset_index(drop=True)

## 3.1 Visual

In [None]:
fig = splt.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)
fig.add_trace(go.Scatter(x=tim_dataset[timestamp], y=tim_dataset[target], name=target, line=dict(color='black')), row=1, col=1)
fig.add_trace(go.Scatter(x=detection_build_kpi_model_table['timestamp'], y=detection_build_kpi_model_table['normal_behavior'], name='InSample Normal Behavior', line=dict(color='goldenrod')), row=1, col=1)
fig.add_trace(go.Scatter(x=detection_detect_table['timestamp'], y=detection_detect_table['normal_behavior'], name='OutOfSample Normal Behavior', line=dict(color='darkgoldenrod')), row=1, col=1)
for ai in [f for f in detection_build_kpi_model_table.columns if 'anomaly_indicator' in f]:
    fig.add_trace(go.Scatter(x=detection_build_kpi_model_table['timestamp'], y=detection_build_kpi_model_table[ai], name= ai.replace('anomaly_indicator_','')+' InSample'), row=2, col=1)
    va = detection_build_kpi_model_table[detection_build_kpi_model_table[ai]>=1]
    fig.add_trace(go.Scatter(x=va['timestamp'], y=va['kpi'], name=ai.replace('anomaly_indicator_','')+' anomaly inSample',mode='markers', line={'color': 'red'}), row=1, col=1)
for ai in [f for f in detection_detect_table.columns if 'anomaly_indicator' in f]:
    fig.add_trace(go.Scatter(x=detection_detect_table['timestamp'], y=detection_detect_table[ai], name= ai.replace('anomaly_indicator_','')+' OutOfSample'), row=2, col=1)
    va = detection_detect_table[detection_detect_table[ai]>=1]
    fig.add_trace(go.Scatter(x=va['timestamp'], y=va['kpi'], name=ai.replace('anomaly_indicator_','')+' anomaly outOfSample',mode='markers', line={'color': 'red'}), row=1, col=1)
fig.add_hline(y=1, line_color="orange", row=2, col=1)
fig.update_layout(height=700, width=1400, title_text="Results")
fig.show()

## 3.2 Anomaly List

In [22]:
detection_table = pd.concat([detection_build_kpi_model_table,detection_detect_table])
anomaly_list_df = detection_table[detection_table['anomaly_code'].isin([1,3])].copy().rename(columns={'anomaly_code':'anomaly'})
anomaly_list_df['timestamp'] = pd.to_datetime(anomaly_list_df['timestamp'],format='%Y-%m-%dT%H:%M:%S.%fZ')
anomaly_list_df

Unnamed: 0,timestamp,model_index,kpi,normal_behavior,anomaly,anomaly_indicator_residual,anomaly_indicator_imbalance
638,2019-03-06 11:03:11,1.0,3.45,0.855126,1.0,1.212612,0.376316
1107,2019-03-06 11:11:01,1.0,0.00,3.555792,1.0,1.664308,0.212900
1109,2019-03-06 11:11:03,1.0,3.72,0.581634,1.0,1.222966,0.134751
1137,2019-03-06 11:11:31,1.0,3.18,1.353532,1.0,1.070858,0.584259
1756,2019-03-06 11:21:50,1.0,3.42,1.425940,1.0,1.069886,0.597299
...,...,...,...,...,...,...,...
4638,2019-03-06 14:46:30,1.0,3.42,3.127911,1.0,0.580257,1.022099
4639,2019-03-06 14:46:31,1.0,3.42,3.150170,1.0,0.524101,1.037345
4640,2019-03-06 14:46:32,1.0,3.38,3.146576,1.0,0.470794,1.043201
4641,2019-03-06 14:46:33,1.0,3.38,3.129912,1.0,0.488658,1.019313


## 3.3 Insights

In [None]:
fig1 = go.Figure(go.Bar(x=properties_df['name'], y=properties_df['rel_importance'],text=round(properties_df['rel_importance'],2),textposition='auto'))
fig1.update_layout(height=500,width=1200,title_text='Predictor Importances',xaxis_title='name',yaxis_title='rel_importance')
print('Predictors not used:'+str(list(set(predictors+[target])-set(list(properties_df['name'])))))
fig1.show()

In [None]:
fig = px.sunburst(features_df, path=['Model','Feature'], values='importance',color='Feature')
fig.update_layout(height=700,width=700,title_text='Feature Importances')
fig.show()

In [25]:
warnings = list(model_logs_df[model_logs_df['messageType'] == "Warning"]['message'])
warnings

[]

In [26]:
model_logs_df

Unnamed: 0,message,messageType,createdAt,origin
0,The job is categorized as light.,Info,2023-04-20T15:26:48.768Z,Registration
1,Expected result table size is 0.87 MiB.,Info,2023-04-20T15:26:48.768Z,Registration
2,Job waiting in queue with priority 4.,Info,2023-04-20T15:26:49.065Z,Execution
3,Executing job.,Info,2023-04-20T15:26:49.126Z,Execution
4,"Detection job, type: model building, approach:...",Info,2023-04-20T15:26:49.296Z,Execution
5,"Getting data from dataset version ""0d01f203-47...",Info,2023-04-20T15:26:49.431Z,Execution
6,Used sampling period 1 second.,Info,2023-04-20T15:26:50.689Z,Execution
7,Validation successful.,Info,2023-04-20T15:26:50.758Z,Execution
8,Building the normal behavior model.,Info,2023-04-20T15:26:50.771Z,Execution
9,Parameter useKPIoffsets is set to true. Reason...,Info,2023-04-20T15:26:50.803Z,Execution


In [97]:
detect_logs_df

Unnamed: 0,message,messageType,createdAt,origin
0,The job is categorized as light.,Info,2023-02-06T13:52:44.674Z,Registration
1,Expected result table size is 0.43 MiB.,Info,2023-02-06T13:52:44.674Z,Registration
2,Queued by fa4b9fce-f6f6-4f07-bab1-5814a537c413.,Info,2023-02-06T13:52:44.991Z,Execution
3,Job execution posted to queue with priority 5.,Info,2023-02-06T13:52:47.280Z,Execution
4,Handling request ...,Info,2023-02-06T13:52:47.310Z,Execution
5,"Executing job ""663816be-9a87-410a-b39b-f6b32b2...",Info,2023-02-06T13:52:47.338Z,Execution
6,"Anomaly detection job, type: detection., appro...",Info,2023-02-06T13:52:47.596Z,Execution
7,"Getting resources from the parent job ""aa100d3...",Info,2023-02-06T13:52:47.609Z,Execution
8,Used sampling period 1 second.,Info,2023-02-06T13:52:49.113Z,Execution
9,Validation successful.,Info,2023-02-06T13:52:49.162Z,Execution
