In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import os

import pickle

from tmu.models.autoencoder.autoencoder import TMAutoEncoder
from src.lib.care import calculate_care_score

2025-03-22 22:15:43,840 - tmu.clause_bank.clause_bank_cuda - ERROR - No module named 'pycuda'
Traceback (most recent call last):
  File "/Users/kjellhaaland/Documents/GitHub/uia-master-thesis/.venv/lib/python3.12/site-packages/tmu/clause_bank/clause_bank_cuda.py", line 41, in <module>
    from pycuda._driver import Device, Context
ModuleNotFoundError: No module named 'pycuda'


In [2]:
bits = 10

In [3]:
# Create folder figures if it does not exist
os.makedirs("figures", exist_ok=True)

In [4]:
def load_test_dataset(farm, event_id):
    X = np.loadtxt(f"./data_test/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_test_labels(farm, event_id):
    y = np.loadtxt(f"./data_test/y_{farm}_{event_id}.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def load_test_status_labels(farm, event_id):
    z = np.loadtxt(f"./data_test/z_{farm}_{event_id}.txt", dtype=np.uint32)
    z = np.array(z).astype(np.uint32)
    return z


def load_test_label(farm, event_id):
    event_info = pd.read_csv(f"../../../data/care_to_compare/Wind Farm {farm}/event_info.csv", delimiter=';')

    metadata = event_info[event_info['event_id'] == event_id]

    event_label = metadata["event_label"].values[0]

    return False if event_label == "anomaly" else True

In [5]:
def load_model(filename) -> TMAutoEncoder:
    with open(filename, "rb") as f:
        model = pickle.load(f)

    return model

In [6]:
def calculate_accuracy(labels, predictions):
    # Calculate the accuracy
    accuracy = np.sum(labels == predictions) / len(labels)

    return accuracy

In [7]:
def binary_to_float(bin_array):
    """Convert a 10-bit binary array to a float between 0 and 1."""
    return np.dot(bin_array, 2 ** np.arange(len(bin_array))[::-1]) / (2 ** len(bin_array) - 1)


def mse_loss(X, pred, bits_per_value=bits):
    """
    Compute MSE loss for flattened binary inputs.
    - X and pred are 1D arrays of length `num_values * bits_per_value`.
    - We reshape them into (num_values, bits_per_value) before converting.
    """

    num_values = int(len(X) // bits_per_value)

    # Reshape into (num_values, bits_per_value)
    X_reshaped = X.reshape(num_values, bits_per_value)
    pred_reshaped = pred.reshape(num_values, bits_per_value)

    # Convert binary sequences back to float values
    X_floats = np.array([binary_to_float(row) for row in X_reshaped])
    pred_floats = np.array([binary_to_float(row) for row in pred_reshaped])

    # Compute MSE
    mse = np.mean((X_floats - pred_floats) ** 2)
    return mse

In [8]:
def reconstruction_accuracy(X, pred):
    correct = np.sum(X == pred)
    accuracy = correct / len(X)
    return accuracy


def plot_mse(X, y, pred, name, threshold):
    # Compute MSE for each row
    mse_per_row = [mse_loss(X[i], pred[i]) for i in range(len(X))]

    # Plot SNS plot of all MSE values
    plt.figure(figsize=(8, 4))
    sns.histplot(mse_per_row, bins=50, kde=True, color='b')

    # Add a threshold line
    plt.axvline(threshold, color='r', linestyle='--')

    # Save the plot
    plt.savefig(f"./figures/plot_reconstruction_acc_{name}.png")

    plt.close()


def plot_predictions(X, y, z, pred, p, name, threshold):
    x = np.arange(0, len(X))  # Time or index
    r = [mse_loss(X[i], pred[i]) for i in range(len(X))]

    plt.figure(figsize=(12, 6))

    # Plot reconstruction accuracy (main plot)
    plt.plot(x, r, label="Reconstruction Accuracy", color='blue', alpha=0.7)

    # Plot a line at the threshold
    plt.axhline(y=threshold, color='red', linestyle='--', label="Threshold")

    y_mapped = np.where(y == 0, -0.2, -0.1)
    plt.plot(x, y_mapped, label="Actual Anomalies (y)", color='red', linestyle='-', linewidth=2)

    # Plot p (predicted anomalies) as a binary line at -0.2 (stacked below y)
    p_mapped = np.where(p == 0, -0.4, -0.3)
    plt.plot(x, p_mapped, label="Predicted Anomalies (p)", color='green', linestyle='-', linewidth=2)

    z_mapped = np.where(np.logical_or(z == 0, z == 2), -0.6, -0.5)
    plt.plot(x, z_mapped, label="Status Type Id (z)", color='orange', linestyle='-', linewidth=2)

    # Formatting
    plt.xlabel("Time")
    plt.ylabel("Reconstruction Accuracy")
    plt.title("Anomaly Detection Visualization")

    # Set y-limits to make space for stacked lines
    plt.ylim(-0.7, 1.1)

    # Add horizontal reference lines for y and p
    plt.axhline(y=-0.2, color='black', linestyle='dotted', linewidth=1, alpha=0.5)
    plt.axhline(y=-0.4, color='black', linestyle='dotted', linewidth=1, alpha=0.5)
    plt.axhline(y=-0.6, color='black', linestyle='dotted', linewidth=1, alpha=0.5)

    # Add grid and legend
    plt.legend()
    plt.grid(True)

    # Save the plot
    plt.savefig(f"./figures/plot_detections_{name}.png")

    plt.close()


def get_predictions(X, y, z, name):
    tm = load_model("models/latest_9.pkl")

    pred = tm.predict(X)

    threshold = 0.08

    # For each row in pred, if the MSE is greater than the threshold, then it is an anomaly
    losses = [mse_loss(X[i], pred[i]) for i in range(len(X))]

    X_predictions = np.array([1 if losses[i] > threshold else 0 for i in range(len(X))])

    # Accuracy
    accuracy = calculate_accuracy(y, X_predictions)

    plot_mse(X, y, pred, name, threshold)
    plot_predictions(X, y, z, pred, X_predictions, name, threshold)

    return X_predictions, accuracy

In [9]:
test_datasets = [68, 22, 72, 73, 0, 26, 40, 42, 10, 45, 84, 25, 69, 13, 24, 3, 17, 38, 71, 14, 92, 51]


#test_datasets = [68, 51]


def run_prediction(farm, dataset):
    X = load_test_dataset(farm, dataset)
    y = load_test_labels(farm, dataset)
    z = load_test_status_labels(farm, dataset)

    is_normal = load_test_label(farm, dataset)

    predictions, accuracy = get_predictions(X, y, z, f"{farm}_{dataset}")

    return X, y, z, is_normal, predictions, accuracy


In [10]:
# Create a dataframe with status_type_id;label;prediction
elements = []

for set in test_datasets:
    res = run_prediction("A", set)

    result_df = pd.DataFrame({
        'status_type_id': res[2],
        'label': res[1],
        'prediction': res[4]
    })

    print(f"Done with {set}. Accuracy: {res[5]}")

    elements.append({'dataset': set, 'normal': res[3], 'data': result_df, 'accuracy': res[5]})

Done with 68. Accuracy: 0.37342047930283223
Done with 22. Accuracy: 0.23365300784655624
Done with 72. Accuracy: 0.4561229548834903
Done with 73. Accuracy: 0.4794679005205321
Done with 0. Accuracy: 0.2150158618258724
Done with 26. Accuracy: 0.29354614850798055
Done with 40. Accuracy: 0.32185537775977313
Done with 42. Accuracy: 0.4902084649399874
Done with 10. Accuracy: 0.3120567375886525
Done with 45. Accuracy: 0.34652777777777777
Done with 84. Accuracy: 0.43702157272094644
Done with 25. Accuracy: 0.926537350392076
Done with 69. Accuracy: 0.8800617045892788
Done with 13. Accuracy: 0.9369243650572147
Done with 24. Accuracy: 0.8913043478260869
Done with 3. Accuracy: 0.8891580860084797
Done with 17. Accuracy: 0.8673139158576052
Done with 38. Accuracy: 0.8113207547169812
Done with 71. Accuracy: 0.8893425888117599
Done with 14. Accuracy: 0.9110994213571805
Done with 92. Accuracy: 0.9413754227733935
Done with 51. Accuracy: 0.9114082741328876


In [12]:
score = calculate_care_score(elements)
print(score)

0.6704789824108351
