In [20]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import seaborn as sns
import pickle

from tmu.models.classification.vanilla_classifier import TMClassifier
from sklearn.metrics import confusion_matrix
from src.lib.care import calculate_care_score

In [21]:
# Create folder figures_1 if it does not exist
os.makedirs("figures", exist_ok=True)

In [22]:
def load_test_dataset(farm, event_id):
    X = np.loadtxt(f"./data_test/X_{farm}_{event_id}.txt", dtype=np.uint32)
    X = np.array(X).astype(np.uint32)

    # Take the first 3000 rows
    #X = X[:3000]

    return X


def load_test_labels(farm, event_id):
    # Load dataframe from file
    df = pd.read_csv(f"./data_test/y_{farm}_{event_id}.csv")

    labels = df['label'].values
    status_ids = df['status_type_id'].values
    train_test = df['train_test'].values

    # Take the first 3000 rows
    #labels = labels[:3000]
    #status_ids = status_ids[:3000]
    #train_test = train_test[:3000]

    return np.array(labels).astype(np.uint32), np.array(status_ids).astype(np.uint32), train_test


def load_test_label(farm, event_id):
    event_info = pd.read_csv(f"../../../data/care_to_compare/Wind Farm {farm}/event_info.csv", delimiter=';')

    metadata = event_info[event_info['event_id'] == event_id]

    event_label = metadata["event_label"].values[0]

    return False if event_label == "anomaly" else True

In [23]:
def calculate_accuracy(labels, predictions):
    accuracy = np.sum(labels == predictions) / len(labels)
    return accuracy

In [24]:
def load_model(filename) -> TMClassifier:
    with open(filename, "rb") as f:
        model = pickle.load(f)

    return model

In [25]:
def plot_predictions(X, y, z, p, name):
    cm = confusion_matrix(y, p)

    sns.heatmap(cm, annot=True, fmt='d', cmap='YlOrBr',
                xticklabels=['Predicted normal', 'Predicted anomaly'],
                yticklabels=['Actual normal', 'Actual anomaly'])

    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')

    plt.savefig(f"./figures/confusion_matrix{name}.pdf")
    plt.close()


def get_predictions(X, y, z, name, tm):
    X_predictions = tm.predict(X)

    # Accuracy
    accuracy = calculate_accuracy(y, X_predictions)

    plot_predictions(X, y, z, X_predictions, name)

    return X_predictions, accuracy

In [26]:
#test_datasets = [44, 33, 11, 49, 31, 67, 9, 91, 5, 90, 70, 35, 16, 76, 8, 85, 6, 62, 36, 56, 94, 54, 43, 50, 64, 46, 65, 61, 93, 75, 41, 58, 48, 88, 57, 32, 89, 59, 63, 80, 37, 29, 1, 20, 60]
test_datasets = [11, 33, 44, 49, 31, 67, 9, 91, 5, 90, 70, 35, 16,
                 76, 61, 93, 75, 41, 58, 48, 88, 57, 32, 89, 59, 63,
                 80, 37, 29, 1, 20, 60]

tm_classifier = load_model("best_1.pkl")


def run_prediction(farm, dataset):
    X = load_test_dataset(farm, dataset)
    labels, status_ids, train_test = load_test_labels(farm, dataset)

    is_normal = load_test_label(farm, dataset)

    predictions, accuracy = get_predictions(X, labels, status_ids, f"{farm}_{dataset}", tm_classifier)

    return X, labels, status_ids, train_test, is_normal, predictions, accuracy


In [27]:
# Create a dataframe with status_type_id;label;prediction
elements = []

for set in test_datasets:
    res = run_prediction("C", set)

    result_df = pd.DataFrame({
        'status_type_id': res[2],
        'label': res[1],
        'prediction': res[5],
        'train_test': res[3],
    })

    print(f"Done with {set}. Accuracy: {res[6]}")

    elements.append({'dataset': set, 'normal': res[4], 'data': result_df, 'accuracy': res[6]})

2025-05-15 18:53:57,903 - matplotlib.backends.backend_pdf - DEBUG - Assigning font /F1 = '/System/Library/Fonts/Supplemental/Arial.ttf'
2025-05-15 18:53:57,919 - matplotlib.backends.backend_pdf - DEBUG - Embedding font /System/Library/Fonts/Supplemental/Arial.ttf.
2025-05-15 18:53:57,919 - matplotlib.backends.backend_pdf - DEBUG - Writing TrueType font.
Done with 11. Accuracy: 0.19348420790848048
2025-05-15 18:53:58,461 - matplotlib.backends.backend_pdf - DEBUG - Assigning font /F1 = '/System/Library/Fonts/Supplemental/Arial.ttf'
2025-05-15 18:53:58,474 - matplotlib.backends.backend_pdf - DEBUG - Embedding font /System/Library/Fonts/Supplemental/Arial.ttf.
2025-05-15 18:53:58,475 - matplotlib.backends.backend_pdf - DEBUG - Writing TrueType font.
Done with 33. Accuracy: 0.30817989737398127
2025-05-15 18:54:00,054 - matplotlib.backends.backend_pdf - DEBUG - Assigning font /F1 = '/System/Library/Fonts/Supplemental/Arial.ttf'
2025-05-15 18:54:00,067 - matplotlib.backends.backend_pdf - DEBU



Done with 48. Accuracy: 1.0
2025-05-15 18:54:08,930 - matplotlib.backends.backend_pdf - DEBUG - Assigning font /F1 = '/System/Library/Fonts/Supplemental/Arial.ttf'
2025-05-15 18:54:08,953 - matplotlib.backends.backend_pdf - DEBUG - Embedding font /System/Library/Fonts/Supplemental/Arial.ttf.
2025-05-15 18:54:08,953 - matplotlib.backends.backend_pdf - DEBUG - Writing TrueType font.
Done with 88. Accuracy: 0.754599097535578
2025-05-15 18:54:09,467 - matplotlib.backends.backend_pdf - DEBUG - Assigning font /F1 = '/System/Library/Fonts/Supplemental/Arial.ttf'
2025-05-15 18:54:09,484 - matplotlib.backends.backend_pdf - DEBUG - Embedding font /System/Library/Fonts/Supplemental/Arial.ttf.
2025-05-15 18:54:09,485 - matplotlib.backends.backend_pdf - DEBUG - Writing TrueType font.
Done with 57. Accuracy: 0.9530420579828501
2025-05-15 18:54:09,934 - matplotlib.backends.backend_pdf - DEBUG - Assigning font /F1 = '/System/Library/Fonts/Supplemental/Arial.ttf'
2025-05-15 18:54:09,947 - matplotlib.ba

In [28]:
# Safe the results to results.pkl
with open("results.pkl", "wb") as f:
    pickle.dump(elements, f)

In [30]:
score = calculate_care_score(elements)
print(score)

0.6814294026433256
