In [1]:
from healthESN import Activation, HealthESN
from sklearn.preprocessing import StandardScaler

In [2]:
import pickle
with open(f"benchmark_data.pickle", "rb") as f:
    data = pickle.load(f)

column_history = data["column_history"]
perturbations = data["column_perturbations"]

In [3]:
import avh.metrics as metrics
import pandas as pd
import numpy as np

def extract_features(data: pd.Series) -> np.ndarray:
    row_count = metrics.RowCount.calculate(data)
    min_val = metrics.Min.calculate(data)
    max_val = metrics.Max.calculate(data)
    mean_val = metrics.Mean.calculate(data)
    median_val = metrics.Median.calculate(data)
    sum_val = metrics.Sum.calculate(data)
    range_val = metrics.Range.calculate(data)
    distinct_ratio = metrics.DistinctRatio.calculate(data)
    complete_ratio = metrics.CompleteRatio.calculate(data)

    features = [
        row_count,
        min_val,
        max_val,
        mean_val,
        median_val,
        sum_val,
        range_val,
        distinct_ratio,
        complete_ratio,
    ]
    return features

In [4]:
column = "numeric_0"
history = [run[0] for run in column_history]
column_perturbations = perturbations[0]

In [5]:
from dataclasses import dataclass

@dataclass
class CustomParameters:
    linear_hidden_size: int = 500
    prediction_window_size: int = 30
    connectivity: float = 0.25
    spectral_radius: float = 0.6
    activation: str = Activation.TANH.value
    random_state: int = 42

In [6]:
args = CustomParameters()
health_esn = HealthESN(n_dimensions=9,
                        hidden_units=args.linear_hidden_size,
                        window_size=args.prediction_window_size,
                        connectivity=args.connectivity,
                        spectral_radius=args.spectral_radius,
                        activation=Activation(args.activation).get_fun(),
                        seed=args.random_state)

scaler = StandardScaler()

In [23]:
thresholds = np.arange(1, 100, 1)

In [49]:
fp = []

feature_history = np.array([extract_features(run[column]) for run in history])
for i in range(30):
    
    train_df = feature_history[i:i+30]
    test_df = np.vstack([train_df, feature_history[[i+30]]])

    train_df = scaler.fit_transform(train_df)
    test_df = scaler.transform(test_df)

    health_esn = health_esn.fit(train_df)
    prediction = health_esn.predict(test_df)[-1]
    
    fp = prediction < thresholds


In [50]:
fp

array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True])

In [51]:
tp = np.zeros_like(thresholds)

feature_history = np.array([extract_features(run[column]) for run in history])
train_df = feature_history[:30]
train_df = scaler.fit_transform(train_df)

health_esn = health_esn.fit(train_df)
for issue, perturbed_column in column_perturbations:
    
    test_df = np.array(extract_features(perturbed_column)).reshape(1, -1)
    test_df = scaler.transform(test_df)

    test_df = np.vstack([train_df, test_df])
    prediction = health_esn.predict(test_df)[-1]
    

    tp += prediction < thresholds


In [52]:
tp

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])