In [34]:
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import pandas as pd
import pickle as pkl
import os
from model_trainer.entities import ANOMALY_ARCHIVE_ENTITIES, MACHINES, MSL_CHANNELS, SMAP_CHANNELS, ANOMALY_ARCHIVE_ENTITY_TO_DATA_FAMILY
from tqdm import tqdm, trange
from model_selection.utils import RunningStdWelford
from metrics.metrics import prauc

import sys
sys.path.append('/home/ubuntu/PyMAD/')
from src.pymad.evaluation.numpy import best_f1_linspace

In [35]:
SAVE_DIR = '/home/ubuntu/efs/results_Jul22_f1_test_set/'
# SAVE_DIR_TEST = '/home/ubuntu/efs/results_Jul22_f1_test_set/'

DATASETS = ['anomaly_archive', 'smd', 'msl', 'smap']
ENTITIES = [ANOMALY_ARCHIVE_ENTITIES, MACHINES, MSL_CHANNELS, SMAP_CHANNELS]
DATASET_ENTITY = dict(zip(DATASETS, ENTITIES))

In [53]:
# Specify the dataset to view the results
dataset = 'smd'
compute_running_std = False # Whether to compute running standard deviation
n_splits = 100

In [46]:
ranking_object_files = os.listdir(os.path.join(SAVE_DIR, dataset))
evaluated_entities = ['_'.join(i.split('_')[2:]).split('.')[0] for i in ranking_object_files]
print(f'{len(evaluated_entities)} entities evaluated: {evaluated_entities}')

26 entities evaluated: ['machine-2-4', 'machine-1-5', 'machine-3-3', 'machine-2-8', 'machine-3-7', 'machine-1-1', 'machine-3-5', 'machine-1-7', 'machine-2-6', 'machine-3-9', 'machine-3-1', 'machine-2-2', 'machine-3-10', 'machine-1-6', 'machine-3-4', 'machine-2-5', 'machine-3-8', 'machine-2-9', 'machine-2-1', 'machine-1-2', 'machine-2-7', 'machine-1-8', 'machine-3-11', 'machine-3-2', 'machine-1-4', 'machine-2-3']


In [56]:
anomaly_scores_all_entities = {}
anomaly_labels_all_entities = {}

for entity in tqdm(evaluated_entities):     
    anomaly_scores_all_entities[entity] = {}
    anomaly_scores_all_entities[entity] = {}
    
    ranking_obj_file = f'ranking_obj_{entity}.data'
    
    with open(os.path.join(SAVE_DIR, dataset, ranking_obj_file), 'rb') as f: 
        ranking_obj = pkl.load(f)

    model_names = list(ranking_obj.predictions.keys())
    
    for mn in model_names: 
        anomaly_scores = ranking_obj.predictions[mn]['entity_scores'].squeeze()
        anomaly_labels = ranking_obj.predictions[mn]['anomaly_labels'].squeeze()
        
        if compute_running_std:
            # Compute running standard deviation
            std_obj = RunningStdWelford()
            std_dev = []
            for v in anomaly_scores:
                std_obj.update(v)
                std_dev.append(std_obj.std)
            std_dev = np.array(std_dev)
        else: 
            std_dev = np.std(anomaly_scores)
        
        if isinstance(std_dev, np.ndarray):
            std_dev[std_dev < 1e-6] = 1
        anomaly_scores = anomaly_scores/std_dev # Normalize the entity scores

        anomaly_scores_all_entities[entity][mn] = anomaly_scores
        anomaly_scores_all_entities[entity][mn] = anomaly_labels


100%|██████████| 26/26 [00:09<00:00,  2.62it/s]


In [58]:
# Finds the PR-AUC of all models
PR_AUC_MODELS = {}
BEST_F1_MODELS = {}
for entity in anomaly_scores_all_entities.keys():
    for mn in anomaly_scores_all_entities[entity].keys():
        scores = anomaly_scores_all_entities[entity][mn]
        labels = anomaly_labels_all_entities[entity][mn]

        PR_AUC_MODELS[mn].extend(list(scores))

PR_AUC_models[mn] = prauc(labels, scores)
BEST_F1_models[mn] = best_f1_linspace(
                        scores=np.array(scores), 
                        labels=np.array(labels), 
                        n_splits=n_splits, 
                        segment_adjust=True)[0]
best_model_overall = BEST_F1_models.keys()[np.argmax(list(BEST_F1_models.values()))]

KeyError: 'machine-2-4'

In [57]:
for entity in anomaly_scores_all_entities.keys(): 
    # Best model

{'machine-2-4': {'RNN_1': array([0., 0., 0., ..., 0., 0., 0.]),
  'LSTMVAE_3': array([0., 0., 0., ..., 0., 0., 0.]),
  'NN_3': array([0., 0., 0., ..., 0., 0., 0.]),
  'DGHL_4': array([0., 0., 0., ..., 0., 0., 0.]),
  'RNN_3': array([0., 0., 0., ..., 0., 0., 0.]),
  'RM_2': array([0., 0., 0., ..., 0., 0., 0.]),
  'DGHL_2': array([0., 0., 0., ..., 0., 0., 0.]),
  'LSTMVAE_1': array([0., 0., 0., ..., 0., 0., 0.]),
  'NN_1': array([0., 0., 0., ..., 0., 0., 0.]),
  'RNN_2': array([0., 0., 0., ..., 0., 0., 0.]),
  'LSTMVAE_4': array([0., 0., 0., ..., 0., 0., 0.]),
  'RM_1': array([0., 0., 0., ..., 0., 0., 0.]),
  'DGHL_1': array([0., 0., 0., ..., 0., 0., 0.]),
  'RNN_4': array([0., 0., 0., ..., 0., 0., 0.]),
  'MD_1': array([0., 0., 0., ..., 0., 0., 0.]),
  'DGHL_3': array([0., 0., 0., ..., 0., 0., 0.]),
  'RM_3': array([0., 0., 0., ..., 0., 0., 0.]),
  'LSTMVAE_2': array([0., 0., 0., ..., 0., 0., 0.]),
  'NN_2': array([0., 0., 0., ..., 0., 0., 0.])},
 'machine-1-5': {'RNN_1': array([0., 0.,

0.1482554643999619