# Example of model analysis for anomaly detection

#### package import and variables definition

In [None]:
from odin.classes import Curves
from odin.classes.timeseries import StandardScaler
from odin.classes.timeseries.anomaly_definition_strategies import AnomalyDefinitionStrategyTSAE
from odin.classes.timeseries.anomaly_matching_strategies import *
from odin.classes.timeseries import DatasetTSAnomalyDetection, TimeSeriesType, TSProposalsType, AnalyzerTSAnomalyDetection
from odin.classes import Errors, Metrics, CustomError, ErrCombination
from odin.classes.timeseries.metrics import f1_score
from odin.classes.timeseries.ts_custom_metric import TSCustomMetric

import numpy as np
import pandas as pd

# Create the scaler

In [None]:
scaler = StandardScaler(mean=71.43390471202335, std=4.612135080923405)

## Dataset

In [None]:
GT_PATH = "../../test-data/anomaly_detection/gt.csv"
PROPOSALS_PATH = [('LSTM', "../../test-data/anomaly_detection/predictions.csv", TSProposalsType.REGRESSION)]

my_dataset = DatasetTSAnomalyDetection(GT_PATH,
                                       TimeSeriesType.UNIVARIATE,
                                       anomalies_path='../../test-data/anomaly_detection/anomalies.json',
                                       proposals_paths=PROPOSALS_PATH, 
                                       properties_path='../../test-data/anomaly_detection/properties.csv',
                                       index_gt='timestamp', 
                                       index_proposals='timestamp',
                                       scaler=scaler
                                      )

In [None]:
my_dataset.get_observations()

#### Inspect anomalies

In [None]:
my_dataset.get_aggregate_anomalies()

In [None]:
my_dataset.get_anomaly_percentage()

#### Data set analysis

In [None]:
my_dataset.analyze_stationarity()

In [None]:
my_dataset.analyze_periodicity()

In [None]:
my_dataset.plot_fft()

In [None]:
my_dataset.analyze_seasonality_trend(period=24*7*2, model_type='additive')

### Property distribution

In [None]:
my_dataset.show_distribution_of_property(property_name="my_meta_ann",
                                         plot_type="pie")

# Create the anomaly strategy evaluator

In [None]:
evaluator = AnomalyDefinitionStrategyTSAE()

## Analyzer

In [None]:
my_analyzer = AnalyzerTSAnomalyDetection('LSTM', 
                                         my_dataset, 
                                         threshold=0.85, #1.6
                                         anomaly_evaluation=evaluator,
                                         #matching_strategy=AnomalyMatchingStrategyIntervalToInterval(0.01),
                                         scaler_values=(True, True))

### Performance analysis

In [None]:
my_analyzer.analyze_performance()

In [None]:
my_analyzer.analyze_performance_for_threshold()

In [None]:
my_analyzer.show_confusion_matrix()

### False Positive errors analysis
FP errors are categorized into "generic", "affected", and "continuous", and then their distance distribution from the nearest anomaly is shown.

#### Combine predefined errors 

In [None]:
anticipation = ErrCombination("ANTICIPATION", 
                              [Errors.BEFORE, Errors.CLOSELY_BEFORE, Errors.CLOSELY_AFTER, Errors.AFTER], 
                              ["Before", "Closely before", "Closely after", "After"])

In [None]:
my_analyzer.analyze_false_positive_errors(metric = Metrics.ACCURACY, 
                                          error_combination = anticipation, 
                                          parameters_dicts = [{'closely_threshold': 410}])

#### Combine custom errors  

In [None]:
class GAC(CustomError):
    def __init__(self, name):
        self.name = name
    
    def compute_error(self, y_true, y_score, threshold, observations, parameters_dict = None):
        distance = parameters_dict['distance']
    
        y_pred = np.where(y_score >= threshold, 1, 0)

        matching = pd.DataFrame(data={'y_true': y_true,
                                      'y_pred': y_pred},
                                index=observations.index)
        matching['eval'] = 0
        matching.loc[matching['y_true'] == 1, 'eval'] = 1
        matching.loc[(matching['y_true'] == 0) & (matching['y_pred'] == 1), 'eval'] = -1

        generic, affected, continuous = 0, 0, 0

        anomalies_pos = np.where(matching['eval'] == 1)[0]

        previous_anomaly_pos = -1
        anomaly_pos_index = 0
        next_anomaly_pos = anomalies_pos[0]
        is_previous_anomaly = False
        is_next_anomaly = False

        distances = []

        index_values = matching.index
        errors_index = []

        for i, v in enumerate(matching['eval'].values):
            if (i > next_anomaly_pos) and (next_anomaly_pos != -1):
                previous_anomaly_pos = next_anomaly_pos
                anomaly_pos_index += 1
                next_anomaly_pos = anomalies_pos[anomaly_pos_index] if anomaly_pos_index < len(anomalies_pos) else -1
            is_next_anomaly = False
            if i < len(matching)-1 and matching['y_pred'].values[i+1] == 1:
                is_next_anomaly = True
            if v == -1:
                previous_d = i - previous_anomaly_pos if previous_anomaly_pos != -1 else float('inf')
                next_d = i - next_anomaly_pos if next_anomaly_pos != -1 else float('inf')

                d = previous_d if previous_d < np.abs(next_d) else next_d
                # AFFECTED errors
                if np.abs(d) <= distance:
                    affected += 1
                    distances.append(d)
                    errors_index.append(index_values[i])
               

                is_previous_anomaly = True

            else:
                is_previous_anomaly = False

        return affected, distances, errors_index, matching
    
affected = GAC("affected")

class GAC(CustomError):
    def __init__(self, name):
        self.name = name
    
    def compute_error(self, y_true, y_score, threshold, observations, parameters_dict = None):
        distance = parameters_dict['distance']
    
        y_pred = np.where(y_score >= threshold, 1, 0)

        matching = pd.DataFrame(data={'y_true': y_true,
                                      'y_pred': y_pred},
                                index=observations.index)
        matching['eval'] = 0
        matching.loc[matching['y_true'] == 1, 'eval'] = 1
        matching.loc[(matching['y_true'] == 0) & (matching['y_pred'] == 1), 'eval'] = -1

        generic, affected, continuous = 0, 0, 0

        anomalies_pos = np.where(matching['eval'] == 1)[0]

        previous_anomaly_pos = -1
        anomaly_pos_index = 0
        next_anomaly_pos = anomalies_pos[0]
        is_previous_anomaly = False
        is_next_anomaly = False

        distances = []

        index_values = matching.index
        errors_index = []

        for i, v in enumerate(matching['eval'].values):
            if (i > next_anomaly_pos) and (next_anomaly_pos != -1):
                previous_anomaly_pos = next_anomaly_pos
                anomaly_pos_index += 1
                next_anomaly_pos = anomalies_pos[anomaly_pos_index] if anomaly_pos_index < len(anomalies_pos) else -1
            is_next_anomaly = False
            if i < len(matching)-1 and matching['y_pred'].values[i+1] == 1:
                is_next_anomaly = True
            if v == -1:
                previous_d = i - previous_anomaly_pos if previous_anomaly_pos != -1 else float('inf')
                next_d = i - next_anomaly_pos if next_anomaly_pos != -1 else float('inf')

                d = previous_d if previous_d < np.abs(next_d) else next_d
                # CONTINUOUS errors
                if np.abs(d) > distance and (is_previous_anomaly or is_next_anomaly):
                    continuous += 1
                    distances.append(d)
                    errors_index.append(index_values[i])
               

                is_previous_anomaly = True

            else:
                is_previous_anomaly = False

        return continuous, distances, errors_index, matching
    
    
continuous = GAC("continuous")


class GAC(CustomError):
    def __init__(self, name):
        self.name = name
    
    def compute_error(self, y_true, y_score, threshold, observations, parameters_dict = None):
        distance = parameters_dict['distance']
    
        y_pred = np.where(y_score >= threshold, 1, 0)

        matching = pd.DataFrame(data={'y_true': y_true,
                                      'y_pred': y_pred},
                                index=observations.index)
        matching['eval'] = 0
        matching.loc[matching['y_true'] == 1, 'eval'] = 1
        matching.loc[(matching['y_true'] == 0) & (matching['y_pred'] == 1), 'eval'] = -1

        generic, affected, continuous = 0, 0, 0

        anomalies_pos = np.where(matching['eval'] == 1)[0]

        previous_anomaly_pos = -1
        anomaly_pos_index = 0
        next_anomaly_pos = anomalies_pos[0]
        is_previous_anomaly = False
        is_next_anomaly = False

        distances = []

        index_values = matching.index
        errors_index = []

        for i, v in enumerate(matching['eval'].values):
            if (i > next_anomaly_pos) and (next_anomaly_pos != -1):
                previous_anomaly_pos = next_anomaly_pos
                anomaly_pos_index += 1
                next_anomaly_pos = anomalies_pos[anomaly_pos_index] if anomaly_pos_index < len(anomalies_pos) else -1
            is_next_anomaly = False
            if i < len(matching)-1 and matching['y_pred'].values[i+1] == 1:
                is_next_anomaly = True
            if v == -1:
                previous_d = i - previous_anomaly_pos if previous_anomaly_pos != -1 else float('inf')
                next_d = i - next_anomaly_pos if next_anomaly_pos != -1 else float('inf')

                d = previous_d if previous_d < np.abs(next_d) else next_d
                # GENERIC errors
                if np.abs(d) > distance and not (is_previous_anomaly or is_next_anomaly):
                    generic += 1
                    distances.append(d)
                    errors_index.append(index_values[i])
               

                is_previous_anomaly = True

            else:
                is_previous_anomaly = False

        return generic, distances, errors_index, matching
    
generic = GAC("generic")

gac = ErrCombination("GAC", 
                     [affected, continuous, generic], 
                     ["Affected", "Continuous", "Generic"])

In [None]:
my_analyzer.analyze_false_positive_errors(metric = Metrics.F1_SCORE, 
                                          error_combination = gac,
                                          parameters_dicts = [{'distance': 300}])

### Reliability analysis

In [None]:
my_analyzer.analyze_reliability(min_threshold=0.5)

In [None]:
my_analyzer.analyze_gain_lift()

### Predicted and GT windows duration

In [None]:
my_analyzer.analyze_true_predicted_distributions(groups=10)

### Curves

In [None]:
my_analyzer.analyze_curve(Curves.ROC_CURVE)

In [None]:
my_analyzer.analyze_curve(Curves.PRECISION_RECALL_CURVE)

### Custom metrics

In [None]:
class MyCustomF1(TSCustomMetric):
    def __init__(self, name, is_single_threshold):
        super().__init__(name, is_single_threshold)

    def evaluate_metric(self, y_true,
                        y_pred,
                        threshold = None,
                        inverse_threshold = False,
                        evaluation_type = None,
                        min_consecutive_samples = 1):
        return f1_score(y_true, y_pred, threshold, evaluation_type, inverse_threshold, min_consecutive_samples)

my_custom_metric = MyCustomF1("CUSTOM_F1", True)

In [None]:
my_analyzer.add_custom_metric(my_custom_metric)
my_analyzer.analyze_performance(metrics=[Metrics.F1_SCORE, Metrics.CUSTOM_F1])

In [None]:
my_analyzer.analyze_performance()

## Analyzer with matching strategy interval-interval

In [None]:
my_analyzer = AnalyzerTSAnomalyDetection('LSTM', 
                                         my_dataset, 
                                         threshold=1.6,
                                         anomaly_evaluation=evaluator,
                                         matching_strategy=AnomalyMatchingStrategyIntervalToInterval(0.01),
                                         scaler_values=(True, True))

### Predicted and GT windows duration

In [None]:
my_analyzer.analyze_true_predicted_difference_distribution(nbins=19)

### IOU Analysis on windows

In [None]:
my_analyzer.analyze_performance_for_iou_threshold(granularity=50)

In [None]:
my_analyzer.analyze_iou_distribution(nbins=15)