In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
def remove_long_shutdown(numbers, num_consecutive, missing_label):
    chunks = []
    current_chunk = []

    i = 0
    while i < len(numbers)-1:
        num = numbers[i]
        if num != missing_label:
            current_chunk.append(i)
        else:
            j = i+1
            while j < len(numbers):
                if numbers[j] == missing_label:
                    j += 1
                else:
                    break

            if j-i < num_consecutive:
                current_chunk += range(i,min(j+1, len(numbers)))# numbers[i:j+1]
            else:
                chunks.append(current_chunk)
                current_chunk = []

            i=j         

        i+= 1

    # Append the last chunk
    if current_chunk:
        chunks.append(current_chunk)

    to_ret = []
    # Print the chunks
    for i, chunk in enumerate(chunks, 1):
        to_ret += chunk
        
    return to_ret

In [3]:
import os
import pandas as pd
import numpy as np
import sklearn
from sklearn import metrics
import traceback

def calculate_f1(path_to_result, label_file, feasibility=None):
    for result_file in os.listdir(path_to_result):
        try:
            print('----------', result_file, '----------')
            result_adapad = pd.read_csv(path_to_result + '/' + result_file)
            result_adapad = result_adapad.dropna()
            result_adapad = result_adapad.reset_index(drop=True)

            label = pd.read_csv(label_file)
            label = label[-len(result_adapad):]
            label = label.reset_index(drop=True)
            
            if feasibility:
                label_feasibility = pd.read_csv(feasibility)
                cutoff_label_feasibility = label.timestamp[0]
                cutoff_label_feasibility = label_feasibility[label_feasibility.timestamp == cutoff_label_feasibility].index.values[0]
                label_feasibility = label_feasibility[cutoff_label_feasibility:]
            
            total = pd.concat([result_adapad, label], axis=1, join='inner')
            if feasibility:
                total = total[:len(label_feasibility)]
            observed_values = total.observed.values.tolist()
            observed_values = [int(x) for x in observed_values]
            to_keep_comparision = remove_long_shutdown(observed_values, THRESHOLD_INDICATING_SHUTDOWN, MISSING_VALUE)
            total=total[total.index.isin(to_keep_comparision)]

            preds = result_adapad.anomalous
            preds = preds.dropna()
            preds = preds.astype(int)

            Precision, Recall, F, Support =metrics.precision_recall_fscore_support(total.is_anomaly.values.tolist(), 
                                                                                   total.anomalous.values.tolist(), 
                                                                                   zero_division=0)
            precision = Precision[1]
            recall = Recall[1]
            f = F[1]
            print(precision, recall, f)
        except:
            pass
        


In [4]:
import os
import pandas as pd
import numpy as np
import sklearn
from sklearn import metrics
import traceback
import math

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def calculate_roc(path_to_result, label_file, feasibility=None):
    for result_file in os.listdir(path_to_result):
        try:
            print('----------', result_file, '----------')
            result_adapad = pd.read_csv(path_to_result + '/' + result_file)
            result_adapad = result_adapad.dropna()
            result_adapad = result_adapad.reset_index(drop=True)

            label = pd.read_csv(label_file)
            label = label[-len(result_adapad):]
            label = label.reset_index(drop=True)
            
            if feasibility:
                label_feasibility = pd.read_csv(feasibility)
                cutoff_label_feasibility = label.timestamp[0]
                cutoff_label_feasibility = label_feasibility[label_feasibility.timestamp == cutoff_label_feasibility].index.values[0]
                label_feasibility = label_feasibility[cutoff_label_feasibility:]

            # remove shutdown period data
            total = pd.concat([result_adapad, label], axis=1, join='inner')
            if feasibility:
                total = total[:len(label_feasibility)]
            observed_values = total.observed.values.tolist()
            observed_values = [int(x) for x in observed_values]
            to_keep_comparision = remove_long_shutdown(observed_values, THRESHOLD_INDICATING_SHUTDOWN, MISSING_VALUE)
            total=total[total.index.isin(to_keep_comparision)]

            # calculate anomaly scores
            total["anomaly_score"] = np.ones((len(total), 1))
            normal_observed_range = total[(total.observed >= OPERATION_VAL_RANGE[0]) & 
                                          (total.observed <= OPERATION_VAL_RANGE[1])]
            #normal_observed_range
            anomaly_scores_normal_condition = normal_observed_range.err - normal_observed_range.threshold
            anomaly_scores_normal_condition = anomaly_scores_normal_condition.values.tolist()
            anomaly_scores_normal_condition = [sigmoid(x) for x in anomaly_scores_normal_condition]
            total.loc[normal_observed_range.index, 'anomaly_score'] = anomaly_scores_normal_condition

            roc_auc = metrics.roc_auc_score(total.is_anomaly, total.anomaly_score)
            print("ROC AUC:", roc_auc)

            y, x, _ = metrics.precision_recall_curve(total.is_anomaly, total.anomaly_score)
            pr_auc = metrics.auc(x, y)
            print("PR AUC:", pr_auc)
        except:
            pass

In [5]:
THRESHOLD_INDICATING_SHUTDOWN = 30
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (713.682, 763.826)

path_to_result = "./Tide_Pressure/"
label_file = "../../01_data/01_label/Tide_pressure.csv"
feasibility = "../../01_data/01_label/Tide_pressure.bechmark_stage.csv"
calculate_f1(path_to_result, label_file, feasibility)

---------- bk ----------
---------- progress_0.0002.csv ----------
0.02254791431792559 0.9090909090909091 0.044004400440044
---------- progress_0.0008.csv ----------
0.24342105263157895 0.8409090909090909 0.3775510204081633
---------- progress_0.001.csv ----------
0.3592233009708738 0.8409090909090909 0.5034013605442177
---------- progress_0.0014.csv ----------
0.5873015873015873 0.8409090909090909 0.6915887850467289
---------- progress_0.0018.csv ----------
0.7708333333333334 0.8409090909090909 0.8043478260869567
---------- progress_0.0019.csv ----------
0.8043478260869565 0.8409090909090909 0.8222222222222223
---------- progress_0.002.csv ----------
0.8181818181818182 0.8181818181818182 0.8181818181818182
---------- progress_0.0022.csv ----------
0.8181818181818182 0.8181818181818182 0.8181818181818182
---------- progress_0.0026.csv ----------
0.8378378378378378 0.7045454545454546 0.7654320987654323
---------- progress_0.003.csv ----------
0.8571428571428571 0.6818181818181818 0.7594

In [6]:
THRESHOLD_INDICATING_SHUTDOWN = 30
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (713.682, 763.826)

path_to_result = "./Tide_Pressure/"
label_file = "../../01_data/01_label/Tide_pressure.csv"
feasibility = "../../01_data/01_label/Tide_pressure.bechmark_stage.csv"
calculate_roc(path_to_result, label_file, feasibility)

---------- bk ----------
---------- progress_0.0002.csv ----------
ROC AUC: 0.9170454545454545
PR AUC: 0.8010856477832753
---------- progress_0.0008.csv ----------
ROC AUC: 0.9446060272326792
PR AUC: 0.8152343009962884
---------- progress_0.001.csv ----------
ROC AUC: 0.9442631157388867
PR AUC: 0.8164351543256757
---------- progress_0.0014.csv ----------
ROC AUC: 0.9440178213856629
PR AUC: 0.8162673181503096
---------- progress_0.0018.csv ----------
ROC AUC: 0.9439752703243893
PR AUC: 0.816340883062379
---------- progress_0.0019.csv ----------
ROC AUC: 0.9439627553063676
PR AUC: 0.8163619068433041
---------- progress_0.002.csv ----------
ROC AUC: 0.9439677613135764
PR AUC: 0.8202340229033078
---------- progress_0.0022.csv ----------
ROC AUC: 0.943942731277533
PR AUC: 0.8173116078152881
---------- progress_0.0026.csv ----------
ROC AUC: 0.9439101922306768
PR AUC: 0.8172431193952907
---------- progress_0.003.csv ----------
ROC AUC: 0.9438851621946336
PR AUC: 0.8166267728620995
----------

In [11]:
THRESHOLD_INDICATING_SHUTDOWN = 10
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (-2, 32)

path_to_result = "./Seawater_Temperature//"
label_file = "../../01_data/01_label/Seawater_temperature.csv"
calculate_roc(path_to_result, label_file)

---------- progress_0.0000002.csv ----------
ROC AUC: 0.925224912357423
PR AUC: 0.8212875257043566
---------- progress_0.0000005.csv ----------
ROC AUC: 0.9286540144974524
PR AUC: 0.8226391965320728
---------- progress_0.0000008.csv ----------
ROC AUC: 0.930863621132251
PR AUC: 0.8232959939185097
---------- progress_0.000002.csv ----------
ROC AUC: 0.9269344461891955
PR AUC: 0.821753480521177
---------- progress_0.000005.csv ----------
ROC AUC: 0.9225031305920499
PR AUC: 0.8196340073652335
---------- progress_0.000008.csv ----------
ROC AUC: 0.9233209165892005
PR AUC: 0.8200382312248612
---------- progress_0.00002.csv ----------
ROC AUC: 0.9249950594046712
PR AUC: 0.8212335440232782
---------- progress_0.00005.csv ----------
ROC AUC: 0.9238323517754333
PR AUC: 0.820816176357888
---------- progress_0.00008.csv ----------
ROC AUC: 0.9241912329301727
PR AUC: 0.8210227841139982
---------- progress_0.0002.csv ----------
ROC AUC: 0.9243121771587389
PR AUC: 0.8210909445190766
---------- progr

In [12]:
THRESHOLD_INDICATING_SHUTDOWN = 10
MISSING_VALUE = -999
OPERATION_VAL_RANGE = (-2, 32)

path_to_result = "./Seawater_Temperature/"
label_file = "../../01_data/01_label/Seawater_temperature.csv"
calculate_f1(path_to_result, label_file)

---------- progress_0.0000002.csv ----------
0.34174441174629017 0.9571520430233238 0.5036604121808674
---------- progress_0.0000005.csv ----------
0.5095793947852566 0.9390892617057345 0.6606625131083832
---------- progress_0.0000008.csv ----------
0.6049588656784319 0.9327760565850237 0.7339251218839113
---------- progress_0.000002.csv ----------
0.6724933338976594 0.9288010755830947 0.7801345313497324
---------- progress_0.000005.csv ----------
0.6893483381313792 0.9238323493306834 0.7895486223865311
---------- progress_0.000008.csv ----------
0.6933473758118308 0.9235985269188052 0.7920792079207921
---------- progress_0.00002.csv ----------
0.69833266993941 0.9230139708891097 0.7951054937308022
---------- progress_0.00005.csv ----------
0.7006660746003552 0.9223709592564447 0.7963762081409139
---------- progress_0.00008.csv ----------
0.701213279409804 0.9223125036534752 0.7967077358109473
---------- progress_0.0002.csv ----------
0.7018909899888766 0.9221371368445666 0.79707948057