In [14]:
import warnings
warnings.filterwarnings('ignore')

In [15]:
# This function is to remove periods when the sensor is considered turned off.
# This can happen, for example, when the sensor is out-of-battery or turned off by humans to avoid polluted water (see Section 2.2.2).
# As agreed with all of our domain experts, such periods should not be measured in terms of precision.

def remove_long_shutdown(numbers, num_consecutive, missing_label):
    chunks = []
    current_chunk = []

    i = 0
    while i < len(numbers)-1:
        num = numbers[i]
        if num != missing_label:
            current_chunk.append(i)
        else:
            j = i+1
            while j < len(numbers):
                if numbers[j] == missing_label:
                    j += 1
                else:
                    break

            if j-i < num_consecutive:
                current_chunk += range(i,min(j+1, len(numbers)))# numbers[i:j+1]
            else:
                chunks.append(current_chunk)
                current_chunk = []

            i=j         

        i+= 1

    # Append the last chunk
    if current_chunk:
        chunks.append(current_chunk)

    to_ret = []
    # Print the chunks
    for i, chunk in enumerate(chunks, 1):
        to_ret += chunk
        
    return to_ret

In [34]:
import os
import pandas as pd
import numpy as np
import sklearn
from sklearn import metrics
import traceback

def calculate_f1(path_to_result, label_file, feasibility=None):
    for result_file in os.listdir(path_to_result):
        try:
            print('----------', result_file, '----------')
            result_adapad = pd.read_csv(path_to_result + '/' + result_file)
            result_adapad = result_adapad.dropna()
            result_adapad = result_adapad.reset_index(drop=True)

            label = pd.read_csv(label_file)
            label = label[-len(result_adapad):]
            label = label.reset_index(drop=True)
            
            if feasibility:
                label_feasibility = pd.read_csv(feasibility)
                cutoff_label_feasibility = label.timestamp[0]
                cutoff_label_feasibility = label_feasibility[label_feasibility.timestamp == cutoff_label_feasibility].index.values[0]
                label_feasibility = label_feasibility[cutoff_label_feasibility:]
            
            total = pd.concat([result_adapad, label], axis=1, join='inner')
            if feasibility:
                total = total[:len(label_feasibility)]
            observed_values = total.observed.values.tolist()
            observed_values = [int(x) for x in observed_values]
            to_keep_comparision = remove_long_shutdown(observed_values, THRESHOLD_INDICATING_SHUTDOWN, MISSING_VALUE)
            total=total[total.index.isin(to_keep_comparision)]

            preds = result_adapad.anomalous
            preds = preds.dropna()
            preds = preds.astype(int)

            Precision, Recall, F, Support =metrics.precision_recall_fscore_support(total.is_anomaly.values.tolist(), 
                                                                                   total.anomalous.values.tolist(), 
                                                                                   zero_division=0)
            precision = Precision[1]
            recall = Recall[1]
            f = F[1]
            print(precision, recall, f)
        except:
            traceback.print_exc()
        


In [35]:
import os
import pandas as pd
import numpy as np
import sklearn
from sklearn import metrics
import traceback
import math

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def calculate_roc(path_to_result, label_file, feasibility=None):
    for result_file in os.listdir(path_to_result):
        try:
            print('----------', result_file, '----------')
            result_adapad = pd.read_csv(path_to_result + '/' + result_file)
            result_adapad = result_adapad.dropna()
            result_adapad = result_adapad.reset_index(drop=True)

            label = pd.read_csv(label_file)
            label = label[-len(result_adapad):]
            label = label.reset_index(drop=True)
            
            if feasibility:
                label_feasibility = pd.read_csv(feasibility)
                cutoff_label_feasibility = label.timestamp[0]
                cutoff_label_feasibility = label_feasibility[label_feasibility.timestamp == cutoff_label_feasibility].index.values[0]
                label_feasibility = label_feasibility[cutoff_label_feasibility:]

            # remove shutdown period data
            total = pd.concat([result_adapad, label], axis=1, join='inner')
            if feasibility:
                total = total[:len(label_feasibility)]
            observed_values = total.observed.values.tolist()
            observed_values = [int(x) for x in observed_values]
            to_keep_comparision = remove_long_shutdown(observed_values, THRESHOLD_INDICATING_SHUTDOWN, MISSING_VALUE)
            total=total[total.index.isin(to_keep_comparision)]

            # calculate anomaly scores
            total["anomaly_score"] = np.ones((len(total), 1))
            normal_observed_range = total[(total.observed >= OPERATION_VAL_RANGE[0]) & 
                                          (total.observed <= OPERATION_VAL_RANGE[1])]
            #normal_observed_range
            anomaly_scores_normal_condition = normal_observed_range.err - normal_observed_range.threshold
            anomaly_scores_normal_condition = anomaly_scores_normal_condition.values.tolist()
            anomaly_scores_normal_condition = [sigmoid(x) for x in anomaly_scores_normal_condition]
            total.loc[normal_observed_range.index, 'anomaly_score'] = anomaly_scores_normal_condition

            roc_auc = metrics.roc_auc_score(total.is_anomaly, total.anomaly_score)
            print("ROC AUC:", roc_auc)

            y, x, _ = metrics.precision_recall_curve(total.is_anomaly, total.anomaly_score)
            pr_auc = metrics.auc(x, y)
            print("PR AUC:", pr_auc)
        except:
            traceback.print_exc()

In [39]:
THRESHOLD_INDICATING_SHUTDOWN = 30
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (713.682, 763.826)

path_to_result = "./Tide_pressure/"
label_file = "../../../../01_data/01_label/Tide_Pressure.validation_stage.csv"
calculate_f1(path_to_result, label_file)

---------- progress_0.00000003.csv ----------
0.01670814077497334 0.94 0.03283269297939225
---------- progress_0.00000008.csv ----------
0.016702203269367447 0.94 0.03282122905027933
---------- progress_0.0000003.csv ----------
0.016631280962491155 0.94 0.03268428372739917
---------- progress_0.0000008.csv ----------
0.017375231053604435 0.94 0.034119782214156076
---------- progress_0.000003.csv ----------
0.017446176688938383 0.94 0.03425655976676385
---------- progress_0.000008.csv ----------
0.018021472392638037 0.94 0.035364936042136946
---------- progress_0.00003.csv ----------
0.019632414369256473 0.94 0.038461538461538464
---------- progress_0.00008.csv ----------
0.02357071213640923 0.94 0.045988258317025445
---------- progress_0.0003.csv ----------
0.04364326375711575 0.92 0.08333333333333334
---------- progress_0.0008.csv ----------
0.13043478260869565 0.9 0.22784810126582278
---------- progress_0.001.csv ----------
0.18723404255319148 0.88 0.3087719298245614
---------- progr

In [38]:
THRESHOLD_INDICATING_SHUTDOWN = 30
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (713.682, 763.826)

path_to_result = "./Tide_pressure/"
label_file = "../../../../01_data/01_label/Tide_Pressure.validation_stage.csv"
calculate_roc(path_to_result, label_file)

---------- progress_0.00000003.csv ----------
ROC AUC: 0.9350446428571427
PR AUC: 0.8527637879522546
---------- progress_0.00000008.csv ----------
ROC AUC: 0.9350148809523808
PR AUC: 0.8525496579468137
---------- progress_0.0000003.csv ----------
ROC AUC: 0.9350148809523808
PR AUC: 0.8524555529934534
---------- progress_0.0000008.csv ----------
ROC AUC: 0.9350595238095238
PR AUC: 0.8530935310416823
---------- progress_0.000003.csv ----------
ROC AUC: 0.9351748511904762
PR AUC: 0.8532428343959285
---------- progress_0.000008.csv ----------
ROC AUC: 0.9352157738095237
PR AUC: 0.8528807121285507
---------- progress_0.00003.csv ----------
ROC AUC: 0.9355840773809524
PR AUC: 0.8521009273924314
---------- progress_0.00008.csv ----------
ROC AUC: 0.9358258928571428
PR AUC: 0.8528476331465379
---------- progress_0.0003.csv ----------
ROC AUC: 0.9335081845238095
PR AUC: 0.8484515659442812
---------- progress_0.0008.csv ----------
ROC AUC: 0.9433407738095237
PR AUC: 0.8433827471990134
----------

Traceback (most recent call last):
  File "/tmp/ipykernel_957/1077959814.py", line 35, in calculate_roc
    observed_values = [int(x) for x in observed_values]
  File "/tmp/ipykernel_957/1077959814.py", line 35, in <listcomp>
    observed_values = [int(x) for x in observed_values]
ValueError: invalid literal for int() with base 10: '739.17'


ROC AUC: 0.9674739583333334
PR AUC: 0.8274625981737788
---------- progress_0.004.csv ----------
ROC AUC: 0.9674665178571429
PR AUC: 0.8274610592813132
---------- progress_0.005.csv ----------
ROC AUC: 0.9673697916666667
PR AUC: 0.8268072097802931
---------- progress_0.006.csv ----------
ROC AUC: 0.9673400297619047
PR AUC: 0.8263373219317747
---------- progress_0.007.csv ----------
ROC AUC: 0.9673214285714286
PR AUC: 0.8259622614630827
---------- progress_0.008.csv ----------
ROC AUC: 0.9672879464285715
PR AUC: 0.8244963130556614
---------- progress_0.009.csv ----------
ROC AUC: 0.9672581845238095
PR AUC: 0.8233050979210402
---------- progress_0.03.csv ----------
ROC AUC: 0.9672395833333333
PR AUC: 0.8227379001256645
---------- progress_0.03_.csv ----------
ROC AUC: 0.9672395833333333
PR AUC: 0.8227379001256645
---------- progress_0.08.csv ----------
ROC AUC: 0.9672395833333333
PR AUC: 0.8227379001256645
---------- progress_0.3.csv ----------
ROC AUC: 0.9672395833333333
PR AUC: 0.822737

In [27]:
THRESHOLD_INDICATING_SHUTDOWN = 10
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (0, 15.2)

path_to_result = "./Wave_height//"
label_file = "../../../../01_data/01_label/Wave_height.csv"
calculate_f1(path_to_result, label_file)

---------- progress_0.05.csv ----------
0.9576651699807569 0.9959973315543695 0.9764551994767823
---------- progress_0.1.csv ----------
0.9751146037982973 0.9933288859239493 0.984137475214805
---------- progress_0.12.csv ----------
0.9782894736842105 0.9919946631087392 0.9850944021199072
---------- progress_0.13.csv ----------
0.9795783926218709 0.9919946631087392 0.9857474312230692
---------- progress_0.14.csv ----------
0.9808707124010554 0.9919946631087392 0.9864013266998342
---------- progress_0.15.csv ----------
0.9828042328042328 0.9913275517011341 0.9870474925273997
---------- progress_0.16.csv ----------
0.9860557768924303 0.9906604402935291 0.9883527454242929
---------- progress_0.17.csv ----------
0.9873670212765957 0.9906604402935291 0.989010989010989
---------- progress_0.18.csv ----------
0.9873670212765957 0.9906604402935291 0.989010989010989
---------- progress_0.19.csv ----------
0.988681757656458 0.9906604402935291 0.9896701099633456
---------- progress_0.20.csv ------

In [28]:
THRESHOLD_INDICATING_SHUTDOWN = 10
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (0, 15.2)

path_to_result = "./Wave_height//"
label_file = "../../../../01_data/01_label/Wave_height.csv"
calculate_roc(path_to_result, label_file)

---------- progress_0.05.csv ----------
ROC AUC: 0.9999132656608364
PR AUC: 0.9995260938144042
---------- progress_0.1.csv ----------
ROC AUC: 0.9999054565376425
PR AUC: 0.9994868798446115
---------- progress_0.12.csv ----------
ROC AUC: 0.9999021206015208
PR AUC: 0.9994696115753537
---------- progress_0.13.csv ----------
ROC AUC: 0.9999012866174903
PR AUC: 0.9994652633379149
---------- progress_0.14.csv ----------
ROC AUC: 0.99990037681673
PR AUC: 0.9994605075085062
---------- progress_0.15.csv ----------
ROC AUC: 0.9998999977330798
PR AUC: 0.9994587049048963
---------- progress_0.16.csv ----------
ROC AUC: 0.9998989362988593
PR AUC: 0.9994529093828922
---------- progress_0.17.csv ----------
ROC AUC: 0.999898405581749
PR AUC: 0.9994500141245204
---------- progress_0.18.csv ----------
ROC AUC: 0.9998977232311786
PR AUC: 0.9994462565716946
---------- progress_0.19.csv ----------
ROC AUC: 0.9998981023148288
PR AUC: 0.9994482654717971
---------- progress_0.20.csv ----------
ROC AUC: 0.999