In [21]:
# Import all required libraries

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from tqdm import tqdm

from tmu.models.classification.vanilla_classifier import TMClassifier

In [31]:
## Helper functions for saving the model and accuracy

# Helper function to load dataset
def load_dataset(farm, event_id):
    X = np.loadtxt(f"./test_data/X_{farm}_{event_id}_10b.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_dataset_labels(farm, event_id):
    y = np.loadtxt(f"./test_data/y_{farm}_{event_id}_10b.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def load_dataset_statuses(farm, event_id):
    z = np.loadtxt(f"./test_data/z_{farm}_{event_id}_10b.txt", dtype=np.uint32)
    z = np.array(z).astype(np.uint32)
    return z


def load_model(filename) -> TMClassifier:
    with open(filename, "rb") as f:
        model = pickle.load(f)

    return model

In [32]:
tm = load_model("latest2.pkl")

### CARE score evaluation

In [33]:
def save_eval_metrics(farm, dataset_id, num_anom, num_norm, n_pred_anom, n_pred_norm, acc):
    with open("eval_metrics.csv", "a") as f:
        f.write(f"{farm},{dataset_id},{num_anom},{num_norm},{n_pred_anom},{n_pred_norm},{acc}\n")

In [34]:
def calculate_accuracy(labels, predictions):
    # g = the ground truth of all data points with a normal status-ID within the prediction time frame
    g = labels

    # p = the corresponding prediction of an AD-model.
    p = predictions

    # Calculate the accuracy
    fp = np.sum((p == 1) & (g == 0))

    # the number of true negatives based on g and p
    tn = np.sum((p == 0) & (g == 0))

    accuracy = tn / (fp + tn)

    return accuracy

In [35]:
def calculate_coverage(labels, statuses, predictions):
    beta = 0.5

    g = labels
    p = predictions

    # the number of true positives based on g and p
    tp = np.sum((p == 1) & (g == 1))

    # the number of false negatives based on g and p
    fn = np.sum((p == 0) & (g == 1))

    # the number of false positives based on g and p
    fp = np.sum((p == 1) & (g == 0))

    beta_squared = beta ** 2

    numerator = (1 + beta_squared) * tp
    denominator = (1 + beta_squared) * tp + beta_squared * fn + fp

    return numerator / denominator

In [36]:
def calculate_reliability(labels, statuses, predictions):
    s = [0 if x in [0, 2] else 1 for x in statuses]

    p = predictions

    N = len(s)

    crit = [0] * (N + 1)  # Initialize crit array with zeros

    for i in range(1, N + 1):
        if s[i - 1] == 0:
            if p[i - 1] == 1:
                crit[i] = crit[i - 1] + 1
            else:
                crit[i] = max(crit[i - 1] - 1, 0)
        else:
            crit[i] = crit[i - 1]

    criticality = crit[1:]

    crit_max = np.max(criticality)

    tc = 75

    # If a value is larger than the threshold, then it is an anomaly (1) else it is not (0)
    criticality = np.array([1 if c > tc else 0 for c in criticality])

    return calculate_coverage(labels, criticality)

In [37]:
def evaluate(farm, dataset_id):
    dataset = load_dataset(farm, dataset_id)
    labels = load_dataset_labels(farm, dataset_id)
    statuses = load_dataset_statuses(farm, dataset_id)

    predictions = tm.predict(dataset)

    coverage = calculate_coverage(labels, predictions)
    accuracy = calculate_accuracy(labels, predictions)
    reliability = calculate_reliability(labels, statuses, predictions)

    return farm, dataset_id, coverage, accuracy, reliability, 0

In [38]:
test_datasets = [
    55,  # 81, 47, 12, 4, 18, 28, 39, 66, 15, 78, 79, 30, 33, 11, 44,  # Has anomalies
    # 8, 85, 6, 62, 36, 56, 94, 54, 43, 50, 64, 46, 65,
]

# 61, 93, 75, 41, 58, 48, 88, 57, 32, 89, 59, 63, 80, 37, 29, 1, 20, 60  # Without anomalies

# Evaluate each dataset and visualize progress with tqdm
results = [evaluate("C", dataset) for dataset in tqdm(test_datasets)]

# Each result is a tuple of (farm, dataset_id, coverage, accuracy, reliability, earliness)
# Plot the accuracies for each dataset

coverage_scores = [result[2] for result in results]
accuracy_scores = [result[3] for result in results]
reliability_scores = [result[4] for result in results]
earliness_scores = [result[5] for result in results]


100%|██████████| 1/1 [00:25<00:00, 25.23s/it]


In [39]:
# Print the average accuracy
mean_coverage = np.mean(coverage_scores)
mean_accuracy = np.mean(accuracy_scores)
mean_reliability = np.mean(reliability_scores)
mean_earliness = np.mean(earliness_scores)

print(f"Mean coverage: {mean_coverage:.5f}")
print(f"Mean accuracy: {mean_accuracy:.5f}")
print(f"Mean reliability: {mean_reliability:.5f}")
print(f"Mean earliness: {mean_earliness:.5f}")

Mean coverage: 0.66980
Mean accuracy: 0.98562
Mean reliability: 0.51781
Mean earliness: 0.00000


In [11]:
plt.figure(figsize=(10, 5))
plt.bar(test_datasets, accuracies, color='skyblue')
plt.xlabel("Dataset ID")
plt.ylabel("Accuracy")
plt.title("Accuracy for each dataset")

# Calculate the mean accuracy
mean_accuracy = np.mean(accuracies)

plt.axhline(y=mean_accuracy, color='r', linestyle='-', label=f"Mean accuracy: {mean_accuracy:.5f}")

plt.show()

NameError: name 'accuracies' is not defined

<Figure size 1000x500 with 0 Axes>