#  ARIMA fault detection algorithm

Pipeline for the anomaly detection on the SKAB. 

The idea behind this algorithm is to use ARIMA weights as features for the anomaly detection algorithm. Using discrete differences of weight coefficients for different heuristic methods for obtaining function, which characterized the state (anomaly, not anomaly) using a threshold. 

Links at [PyPi](https://pypi.org/project/arimafd/), [GitHub](https://github.com/waico/arimafd) and [paper](https://waico.ru)

In [1]:
# libraries importing
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pickle


# additional modules
import sys
sys.path.append('../utils')
from evaluating import evaluating_change_point
from other import MeshLoader

ModuleNotFoundError: No module named 'other'

In [None]:
import numpy as np
from numpy import linalg
import pandas as pd
from sympy import diff, symbols, sympify, Symbol, poly
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from time import time

## Data loading

In [None]:
# benchmark files checking
all_files=[]
import os
for root, dirs, files in os.walk("../data/"):
    for file in files:
        if file.endswith(".csv"):
             all_files.append(os.path.join(root, file))

In [None]:
# datasets with anomalies loading
list_of_df = [pd.read_csv(file, 
                          sep=';', 
                          index_col='datetime', 
                          parse_dates=True) for file in all_files if 'anomaly-free' not in file]
# anomaly-free df loading
anomaly_free_df = pd.read_csv([file for file in all_files if 'anomaly-free' in file][0], 
                            sep=';', 
                            index_col='datetime', 
                            parse_dates=True)
true_cp = [df.changepoint for df in list_of_df]

## Data description and visualization

In [2]:
# dataset characteristics printing
print(f'A number of datasets in the SkAB v1.0: {len(list_of_df)}\n')
print(f'Shape of the random dataset: {list_of_df[0].shape}\n')
n_cp = sum([len(df[df.changepoint==1.]) for df in list_of_df])
n_outlier = sum([len(df[df.anomaly==1.]) for df in list_of_df])
print(f'A number of changepoints in the SkAB v1.0: {n_cp}\n')
print(f'A number of outliers in the SkAB v1.0: {n_outlier}\n')
print(f'Head of the random dataset:')
display(list_of_df[0].head())

NameError: name 'list_of_df' is not defined

In [None]:
# random dataset visualizing
list_of_df[0].plot(figsize=(12,6))
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Signals')
plt.show()

## Labels

In [None]:
# plotting the labels both for outlier and changepoint detection problems
list_of_df[0].anomaly.plot(figsize=(12,3))
list_of_df[0].changepoint.plot()
plt.legend()
plt.show()

## Method applying

In [None]:
from arimafd import *
import os

In [None]:
if os.path.exists(r'tensors.pickle'):
    with open(r'tensors.pickle', 'rb') as f:
        tensors = pickle.load(f)
else:
    tensors = []
    for df in list_of_df:
        a = anomaly_detection(df.iloc[:,:-2])
        tensors.append(a.generate_tensor(ar_order=100))
    with open(r'tensors.pickle', 'wb') as handle:
        pickle.dump(tensors, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
metrics = range(1,6)
windows= [20,50,100,150]
window_insensitivitys = [20,50,100,150]
history = []
for No_metric,window,window_insensitivity in MeshLoader([metrics,windows,window_insensitivitys]):
    print('XXX',No_metric,window,window_insensitivity)
    predicted_cp=[]
    predicted_cp1 = []
    for i,df in enumerate(list_of_df):
        acci = df.changepoint
        a = anomaly_detection(df)
        a.tensor = tensors[i]
        a.proc_tensor(No_metric=No_metric, window=window, window_insensitivity=window_insensitivity)
        predicted_cp.append(a.bin_metric)
    nab = evaluating_change_point(true_cp, predicted_cp, metric='nab', numenta_time='30 sec')
    history.append([No_metric, window, window_insensitivity, nab[0], nab[1], nab[2]])
    print(history)
    print()
    print()
history = pd.DataFrame(history, columns=['No_metric','window','window_insensitivity','Standart','LowFP','LowFN'])

In [None]:
import matplotlib.gridspec as gridspec

f = plt.figure(figsize=(16,4))
grid = gridspec.GridSpec(1, len(metrics),wspace =0.7)
for i in metrics:
    globals()['ax'+str(i)] = f.add_subplot(grid[i-1])
    history[history.No_metric==i].plot.scatter(x='window',y='window_insensitivity', c='Standart', colormap='viridis',ax=globals()['ax'+str(i)])
    globals()['ax'+str(i)].set_title(f"Metric {i}")
    
plt.show()

In [None]:
No_metric= 5
window= 150
window_insensitivity = 20

predicted_cp=[]
for i,df in enumerate(list_of_df):
    acci = df.changepoint
    a = anomaly_detection(df)
    a.tensor = tensors[i]
    a.proc_tensor(No_metric=No_metric,window=window, window_insensitivity=window_insensitivity)
    predicted_cp.append(a.bin_metric)
nab = evaluating_change_point(true_cp, predicted_cp, metric='nab', numenta_time='30 sec')


In [None]:
add = evaluating_change_point(true_cp, predicted_cp, metric='average_delay', numenta_time='30 sec')