In [1]:
import os, sys
sys.path.append(os.path.dirname(os.path.abspath(os.getcwd())))
from src.models import TadGAN, AttentionTadGAN
from src.processor import AnomalyDataset
from mlprimitives import load_primitive
from src.configuration.constants import REPORTS_DIRECTORY, MODELS_DIRECTORY
from orion.evaluation.contextual import contextual_f1_score, contextual_confusion_matrix
import pickle
import json

import numpy as np
import logging
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
def get_epoch_loss(source, dataset, signal, model_name):
    with open(os.path.join(MODELS_DIRECTORY, source, dataset, signal, model_name, 'input_parameters.json'), 'rb') as f:
        input_parameters = json.load(f)
    return np.array(input_parameters['epoch_loss'])
    

def plot_losses(tadgan_loss, attention_tadgan_loss, output_directory):
    fig, axs = plt.subplots(3, 4, figsize=(20, 10), sharey=False, sharex=True)

    epoch_loss_names = [
        ['valid_x_wasserstein_loss', 'fake_x_wasserstein_loss', 'partial_gp_loss_x', 'weighted_loss'],
        ['valid_z_wasserstein_loss', 'fake_z_wasserstein_loss', 'partial_gp_loss_z', 'weighted_loss'],
        ['fake_gen_x_wasserstein_loss', 'fake_gen_z_wasserstein_loss', 'mse', 'weighted_loss'],
    ]
    
    num_epoch, num_model, num_loss = tadgan_loss.shape

    index = [epoch for epoch in range(num_epoch)]
    for i in range(num_model):
        for j in range(num_loss):
            axs[i][j].set_title(epoch_loss_names[i][j])
            axs[i][j].plot(index, tadgan_loss[:, i, j], label='tadgan', color='b')
            axs[i][j].plot(index, attention_tadgan_loss[:, i, j], label='attention_tadgan', color='r')

    fig.suptitle(f'[{source}] [{dataset}] [{signal}] Epoch losses', fontsize=16)
    handles, labels = axs[0][0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper right')
    fig.savefig(os.path.join(output_directory, 'epoch_losses.png'))

In [3]:
def get_anomalies(y_test, y_hat, critic, index_test):
    params = {"rec_error_type": "dtw", "comb": "mult"}

    primitive = load_primitive("orion.primitives.tadgan.score_anomalies", 
                               arguments=params)
    errors, true_index, true, predictions = primitive.produce(y=y_test, y_hat=y_hat, critic=critic, index=index_test)
    
    params = {
    "window_size_portion": 0.33, 
    "window_step_size_portion": 0.1,
    "fixed_threshold": True
    }

    primitive = load_primitive("orion.primitives.timeseries_anomalies.find_anomalies", 
                               arguments=params)
    predicted_anomalies = primitive.produce(errors=errors, index=true_index)
    predicted_anomalies = [(int(anomaly[0]), int(anomaly[1])) for anomaly in predicted_anomalies]
    return predicted_anomalies

def calculate_confusion_matrix(model_name, anomalies, y_test, index_test):
    output_directory = os.path.join(MODELS_DIRECTORY, source, dataset, signal, model_name)
    with open(os.path.join(output_directory, 'y_hat_test.npy'), 'rb') as f:
        y_hat = np.load(f)
    with open(os.path.join(output_directory, 'critic_test.npy'), 'rb') as f:
        critic = np.load(f)
    
    predicted_anomalies = get_anomalies(y_test, y_hat, critic, index_test)
    
    # true negative, false positive, false negative, true positive
    confusion_matrix = contextual_confusion_matrix(
        anomalies, 
        predicted_anomalies, 
        start=index_test[0], 
        end=index_test[-1], 
        weighted=False
    )
    return [predicted_anomalies, confusion_matrix]

In [None]:
source = 'NASA'
dataset = 'SMAP'
columns = ['source', 'dataset', 'signal', 'ground_truth', 'tadgan_pp', 'tadgan_cm', 'attention_tadgan_pp', 'attention_tadgan_cm']

signals = AnomalyDataset.get_signals(source, dataset)
signals_summary = []

for signal in tqdm(signals):
    print(signal)
    try:
        model_epoch_loss = []
        model_epoch_loss.append(get_epoch_loss(source, dataset, signal, 'univariate_tadgan'))
        model_epoch_loss.append(get_epoch_loss(source, dataset, signal, 'univariate_attention_tadgan'))

        output_directory = os.path.join(REPORTS_DIRECTORY, source, dataset, signal)
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
        plot_losses(model_epoch_loss[0], model_epoch_loss[1], output_directory)

        anomaly_dataset = AnomalyDataset.load(source, dataset, signal)
        summary = [source, dataset, signal]
        summary.append(anomaly_dataset.anomalies.values)
        summary.extend(calculate_confusion_matrix(
            'univariate_tadgan', 
            anomaly_dataset.anomalies,
            anomaly_dataset.test.y, 
            list(anomaly_dataset.data['index'])
        ))
        summary.extend(calculate_confusion_matrix(
            'univariate_attention_tadgan', 
            anomaly_dataset.anomalies,
            anomaly_dataset.test.y, 
            list(anomaly_dataset.data['index'])
        ))
        signals_summary.append(summary)
    except Exception as e:
        print('failed', signal, e)

  0%|          | 0/54 [00:00<?, ?it/s]

A-9
failed A-9 'AnomalyDataset' object has no attribute 'data'
A-7
failed A-7 'AnomalyDataset' object has no attribute 'data'
E-8
failed E-8 'AnomalyDataset' object has no attribute 'data'
G-2
failed G-2 'AnomalyDataset' object has no attribute 'data'
E-6
failed E-6 'AnomalyDataset' object has no attribute 'data'
E-1
failed E-1 'AnomalyDataset' object has no attribute 'data'
A-6
failed A-6 'AnomalyDataset' object has no attribute 'data'
A-1
failed A-1 'AnomalyDataset' object has no attribute 'data'
A-8
failed A-8 'AnomalyDataset' object has no attribute 'data'
E-7
failed E-7 'AnomalyDataset' object has no attribute 'data'
G-3
failed G-3 'AnomalyDataset' object has no attribute 'data'
G-4
failed G-4 'AnomalyDataset' object has no attribute 'data'
E-9
failed E-9 'AnomalyDataset' object has no attribute 'data'
S-1
failed S-1 'AnomalyDataset' object has no attribute 'data'
D-5
failed D-5 'AnomalyDataset' object has no attribute 'data'
D-2
failed D-2 'AnomalyDataset' object has no attribute

In [None]:
summary_df = pd.DataFrame(signals_summary, columns=columns)
summary_df.to_csv(os.path.join(REPORTS_DIRECTORY, source, dataset, 'summary.csv'), index=False)

In [6]:
summary_df

Unnamed: 0,source,dataset,signal,ground_truth,tadgan_pp,tadgan_cm,attention_tadgan_pp,attention_tadgan_cm
0,NASA,SMAP,A-9,"[[4569, 8433]]",[],"(None, 0, 1, 0)","[(4921, 5014)]","(None, 0, 0, 1)"
1,NASA,SMAP,A-7,"[[6200, 8600]]","[(1553, 1712), (2462, 2599), (3217, 3392), (62...","(None, 3, 0, 1)","[(1553, 1688), (2442, 2593), (3135, 3214), (32...","(None, 4, 1, 0)"
2,NASA,SMAP,E-8,"[[5400, 6022]]","[(5575, 5635)]","(None, 0, 0, 1)","[(4952, 5099), (5564, 5635), (5663, 5855)]","(None, 1, 0, 1)"
3,NASA,SMAP,G-2,"[[4030, 4070]]","[(3965, 4126)]","(None, 0, 0, 1)","[(3964, 4127)]","(None, 0, 0, 1)"
4,NASA,SMAP,E-6,"[[5610, 5675]]","[(5540, 5708)]","(None, 0, 0, 1)","[(5542, 5709)]","(None, 0, 0, 1)"
5,NASA,SMAP,E-1,"[[5000, 5030], [5610, 6086]]",[],"(None, 0, 2, 0)",[],"(None, 0, 2, 0)"
6,NASA,SMAP,A-6,"[[1890, 1930]]","[(1844, 1987)]","(None, 0, 0, 1)","[(1843, 1985)]","(None, 0, 0, 1)"
7,NASA,SMAP,A-1,"[[4690, 4774]]","[(4667, 4841)]","(None, 0, 0, 1)","[(4667, 4840)]","(None, 0, 0, 1)"
8,NASA,SMAP,A-8,"[[4569, 8374]]",[],"(None, 0, 1, 0)","[(0, 124), (277, 369), (5178, 5256)]","(None, 2, 0, 1)"
9,NASA,SMAP,E-7,"[[5394, 5674]]","[(5385, 5673)]","(None, 0, 0, 1)","[(5385, 5676)]","(None, 0, 0, 1)"


In [7]:
values = []
for i in summary_df.tadgan_cm:
    v = []
    for j in i:
        if j is None:
            v.append(0)
        else:
            v.append(j)
    values.append(v)
tadgan_cm = np.array(values).sum(axis=0)
print(tadgan_cm)

tn, fp, fn, tp = tadgan_cm

f1 = tp / (tp + .5 * (fp + fn))
f1

[ 0 34 21 46]


0.6258503401360545

In [8]:
values = []
for i in summary_df.attention_tadgan_cm:
    v = []
    for j in i:
        if j is None:
            v.append(0)
        else:
            v.append(j)
    values.append(v)
attnetion_tadgan_cm = np.array(values).sum(axis=0)
print(attnetion_tadgan_cm)

tn, fp, fn, tp = attnetion_tadgan_cm

f1 = tp / (tp + .5 * (fp + fn))
f1

[ 0 37 20 47]


0.6225165562913907