In [202]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import os

import pickle

from tmu.models.autoencoder.autoencoder import TMAutoEncoder
from src.lib.care import calculate_care_score

In [203]:
# Create folder figures if it does not exist
os.makedirs("figures", exist_ok=True)

In [204]:
def load_test_dataset(farm, event_id):
    X = np.loadtxt(f"./data_test/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)
    return X


def load_test_labels(farm, event_id):
    y = np.loadtxt(f"./data_test/y_{farm}_{event_id}.txt", dtype=np.uint32)
    y = np.array(y).astype(np.uint32)
    return y


def load_test_status_labels(farm, event_id):
    z = np.loadtxt(f"./data_test/z_{farm}_{event_id}.txt", dtype=np.uint32)
    z = np.array(z).astype(np.uint32)
    return z


def load_test_label(farm, event_id):
    event_info = pd.read_csv(f"../../../data/care_to_compare/Wind Farm {farm}/event_info.csv", delimiter=';')

    metadata = event_info[event_info['event_id'] == event_id]

    event_label = metadata["event_label"].values[0]

    return False if event_label == "anomaly" else True

In [205]:
def load_model(filename) -> TMAutoEncoder:
    with open(filename, "rb") as f:
        model = pickle.load(f)

    return model

In [206]:
def calculate_accuracy(labels, predictions):
    # Calculate the accuracy
    accuracy = np.sum(labels == predictions) / len(labels)

    return accuracy

In [207]:
def reconstruction_accuracy(X, pred):
    correct = np.sum(X == pred)
    accuracy = correct / len(X)
    return accuracy


def plot_mse(X, y, pred, name, threshold):
    # Compute MSE for each row
    mse_per_row = [reconstruction_accuracy(X[i], pred[i]) for i in range(len(X))]

    # Plot SNS plot of all MSE values
    plt.figure(figsize=(8, 4))
    sns.histplot(mse_per_row, bins=50, kde=True, color='b')

    # Add a threshold line
    plt.axvline(threshold, color='r', linestyle='--')

    # Save the plot
    plt.savefig(f"./figures/plot_reconstruction_acc_{name}.png")

    plt.close()


def get_predictions(X, y, name):
    tm = load_model("models/latest_24c.pkl")
    print("Loaded model")

    pred = tm.predict(X)
    print("Predicted")

    threshold = 0.75

    # For each row in pred, if the MSE is greater than the threshold, then it is an anomaly
    losses = [calculate_accuracy(X[i], pred[i]) for i in range(len(X))]
    print("Calculated losses")

    X_predictions = np.array([1 if losses[i] < threshold else 0 for i in range(len(X))])
    print("Calculated predictions")

    # Accuracy
    accuracy = calculate_accuracy(y, X_predictions)
    print("Calculated accuracy")

    plot_mse(X, y, pred, name, threshold)
    print("Plotted MSE")

    return X_predictions, accuracy

In [208]:
test_datasets = [55, 81, 47, 8, 85, 6]


def run_prediction(farm, dataset):
    X = load_test_dataset(farm, dataset)
    y = load_test_labels(farm, dataset)
    z = load_test_status_labels(farm, dataset)

    print(f"Loaded dataset {dataset}")

    is_normal = load_test_label(farm, dataset)

    predictions, accuracy = get_predictions(X, y, f"{farm}_{dataset}")

    return X, y, z, is_normal, predictions, accuracy


In [209]:
# Create a dataframe with status_type_id;label;prediction

elements = []

for set in test_datasets:
    res = run_prediction("C", set)

    result_df = pd.DataFrame({
        'status_type_id': res[2],
        'label': res[1],
        'prediction': res[4]
    })

    print(f"Done with {set}. Accuracy: {res[5]}")

    elements.append({'dataset': set, 'normal': res[3], 'data': result_df, 'accuracy': res[5]})

Loaded dataset 55
Loaded model
Predicted
Calculated losses
Calculated predictions
Calculated accuracy
Plotted MSE
Done with 55. Accuracy: 0.2630754776072659
Loaded dataset 81
Loaded model
Predicted
Calculated losses
Calculated predictions
Calculated accuracy
Plotted MSE
Done with 81. Accuracy: 0.7944606413994169
Loaded dataset 47
Loaded model
Predicted
Calculated losses
Calculated predictions
Calculated accuracy
Plotted MSE
Done with 47. Accuracy: 0.4438807863031072
Loaded dataset 8
Loaded model
Predicted
Calculated losses
Calculated predictions
Calculated accuracy
Plotted MSE
Done with 8. Accuracy: 0.9852809991079393
Loaded dataset 85
Loaded model
Predicted
Calculated losses
Calculated predictions
Calculated accuracy
Plotted MSE
Done with 85. Accuracy: 0.6063091482649843
Loaded dataset 6
Loaded model
Predicted
Calculated losses
Calculated predictions
Calculated accuracy
Plotted MSE
Done with 6. Accuracy: 0.8355748373101952


In [210]:
score = calculate_care_score(elements)
print(score)

0.6758183581707184
