In [1]:
from __future__ import division
from __future__ import print_function

import os
import sys
from time import time

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
# supress warnings for clean output
import warnings

warnings.filterwarnings("ignore")

import numpy as np
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from models.auto_encoder import AutoEncoder
from models.usad import Usad
from models.vae import Vae


from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
import glob

In [2]:
file_list = glob.glob("data/preprocessed/MSL/train/*.csv")

In [3]:
# Define outlier detection tools to be compared
random_state = np.random.RandomState(42)

df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
              'AutoEncoder','Vae', 'USAD']
f1_df = pd.DataFrame(columns=df_columns)
prn_df = pd.DataFrame(columns=df_columns)
time_df = pd.DataFrame(columns=df_columns)




In [4]:
for file in file_list:
    print("\n... Processing", file.split("/")[-1], '...')
    X = pd.read_csv(file).values
    y = pd.read_csv(file.replace("X_train","y_test").replace("train","labels")).values
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # construct containers for saving results
    f1_list = [file.split("/")[-1][:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [file.split("/")[-1][:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [file.split("/")[-1][:-4], X.shape[0], X.shape[1], outliers_percentage]

    X_train = X 
    X_test = pd.read_csv(file.replace("train","test")).values
    y_test = y


    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)
    
    #Windows 
    
    window_size=12

    windows_train= X_train_norm[np.arange(window_size)[None, :] + np.arange( X_train_norm.shape[0]-window_size)[:, None]]
    windows_test=X_test_norm[np.arange(window_size)[None, :] + np.arange(X_test_norm.shape[0]-window_size)[:, None]]
        
    classifiers = {'AutoEncoder': AutoEncoder(),'Vae': Vae(), 'Usad' : Usad()}

    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(windows_train, None)
        test_scores = clf.decision_function(windows_test)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)
        test_scores = [0 if clf.threshold > i else 1 for i in test_scores]
        f1 = round(f1_score(y_test[:-window_size], test_scores, average='micro'), ndigits=4)
        prn = round(precision_n_scores(y_test[:-window_size], test_scores), ndigits=4)

        print('{clf_name} F1:{f1}, '
              'execution time: {duration}s'.format(
            clf_name=clf_name, f1=f1, duration=duration))

        f1_list.append(f1)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(f1_list).transpose()
    temp_df.columns = df_columns
    f1_df = pd.concat([f1_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)



... Processing X_train_M-6.csv ...
AutoEncoder F1:0.9946, execution time: 18.2079s
Vae F1:0.9931, execution time: 17.7468s
Usad F1:0.9941, execution time: 34.8337s

... Processing X_train_M-1.csv ...
AutoEncoder F1:0.4949, execution time: 23.994s
Vae F1:0.4954, execution time: 27.2986s
Usad F1:0.4954, execution time: 48.5032s

... Processing X_train_M-2.csv ...
AutoEncoder F1:0.4945, execution time: 22.6276s
Vae F1:0.4958, execution time: 23.5837s
Usad F1:0.4954, execution time: 50.6283s

... Processing X_train_S-2.csv ...
AutoEncoder F1:0.8887, execution time: 9.7904s
Vae F1:0.8876, execution time: 10.2325s
Usad F1:0.8876, execution time: 20.85s

... Processing X_train_P-10.csv ...
AutoEncoder F1:0.7497, execution time: 51.1742s
Vae F1:0.7497, execution time: 49.6269s
Usad F1:0.7497, execution time: 100.2971s

... Processing X_train_T-4.csv ...
AutoEncoder F1:0.9528, execution time: 24.0786s
Vae F1:0.9528, execution time: 25.2989s
Usad F1:0.9528, execution time: 55.9748s

... Process

In [None]:
f1_df 