In [1]:
!mamba install -y -q pytorch=2.1.2 torchvision pytorch-cuda=12.1 scikit-learn h5py py-xgboost~=2.0 -c pytorch -c nvidia -c anaconda -c conda-forge
!pip install -q lightning torchmetrics pyts

In [1]:
import h5py
import numpy as np
from pyts.image import RecurrencePlot
from pathlib import Path

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split, Dataset, SubsetRandomSampler, Subset
from torchvision import transforms as T
from torchvision.models import vgg11
import lightning as L
import torchmetrics
import pickle
import joblib
from sklearn.metrics import classification_report

from scipy.special import expit
from scipy.special import softmax

In [45]:
report = joblib.load("./results.joblib")
labels = tuple(report['mapper'].keys())
mapper = report["mapper"]

In [46]:
# Sparse
from sklearn.metrics import (
    f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
)

In [47]:
y_true_sparse = np.concatenate([fold["y_true"] for fold in report["with-sparse"]], axis=0)
y_pred_sparse = np.concatenate([fold["y_pred"] for fold in report["with-sparse"]], axis=0)

y_proba_sparse = np.concatenate([fold["y_proba"].astype(np.float32) for fold in report["with-sparse"]], axis=0)[:, :12]
y_proba_sparse = softmax(y_proba_sparse, axis=1).astype(np.float32)

print(y_true_sparse.shape, y_pred_sparse.shape, y_proba_sparse.shape)

sparse = {
    "accuracy": accuracy_score(y_true_sparse, y_pred_sparse),
    "precision": precision_score(y_true_sparse, y_pred_sparse, average="weighted"),
    "recall": recall_score(y_true_sparse, y_pred_sparse, average="weighted"),
    "f1": f1_score(y_true_sparse, y_pred_sparse, average="weighted"),
    "roc": roc_auc_score(y_true_sparse, y_proba_sparse, average="weighted", multi_class='ovr'),
}; sparse

(1211732,) (1211732,) (1211732, 12)


{'accuracy': 0.5526329254323563,
 'precision': 0.5937706105764643,
 'recall': 0.5526329254323563,
 'f1': 0.437083536639011,
 'roc': 0.6612672917827535}

In [48]:
print(classification_report(y_true_sparse, y_pred_sparse, digits=4, target_names=labels))

                  precision    recall  f1-score   support

            HEKA     0.6583    0.0844    0.1496    113543
            HTPC     0.5817    0.1220    0.2017     45410
          boiler     0.6282    0.2348    0.3418     52289
computer monitor     0.5510    0.0145    0.0283     57979
desktop computer     0.7164    0.0282    0.0542     47890
  fridge/freezer     0.8039    0.3981    0.5325     59178
 laptop computer     0.5722    0.0350    0.0659     59011
           light     0.5422    0.9871    0.6999    612534
       microwave     0.5674    0.0786    0.1381     53340
 server computer     0.5985    0.2464    0.3491      3920
      television     0.5538    0.0598    0.1080     56246
    washer dryer     0.8337    0.0248    0.0481     50392

        accuracy                         0.5526   1211732
       macro avg     0.6340    0.1928    0.2264   1211732
    weighted avg     0.5938    0.5526    0.4371   1211732



In [49]:
y_true = np.concatenate([fold["y_true"] for fold in report["without-sparse"]], axis=0)
y_pred = np.concatenate([fold["y_pred"] for fold in report["without-sparse"]], axis=0)
y_proba = np.concatenate([fold["y_proba"].astype(np.float32) for fold in report["without-sparse"]], axis=0)[:, :12]
print(y_true.shape, y_pred.shape, y_proba.shape)

y_proba = softmax(y_proba, axis=1).astype(np.float32)

nonsparse = {
    "accuracy": accuracy_score(y_true, y_pred),
    "precision": precision_score(y_true, y_pred, average="weighted"),
    "recall": recall_score(y_true, y_pred, average="weighted"),
    "f1": f1_score(y_true, y_pred, average="weighted"),
    "roc": roc_auc_score(y_true, y_proba, average="weighted", multi_class='ovr'),
}; nonsparse

(303838,) (303838,) (303838, 12)


{'accuracy': 0.8233005746483323,
 'precision': 0.8253671057708207,
 'recall': 0.8233005746483323,
 'f1': 0.8211101653728601,
 'roc': 0.9786187271098862}

In [50]:
print(classification_report(y_true, y_pred, digits=4, target_names=labels))

                  precision    recall  f1-score   support

            HEKA     0.9663    0.8287    0.8922     14312
            HTPC     0.7320    0.8651    0.7930     39558
          boiler     0.8149    0.8058    0.8103     45742
computer monitor     0.6198    0.5844    0.6016      9653
desktop computer     0.7366    0.5390    0.6225      7462
  fridge/freezer     0.9441    0.9737    0.9587     56039
 laptop computer     0.7052    0.6905    0.6978     12270
           light     0.8333    0.8437    0.8385     85072
       microwave     0.7513    0.8617    0.8027     10071
 server computer     0.8201    0.7297    0.7723      3774
      television     0.7415    0.5863    0.6548     12723
    washer dryer     0.9337    0.5364    0.6814      7162

        accuracy                         0.8233    303838
       macro avg     0.7999    0.7371    0.7605    303838
    weighted avg     0.8254    0.8233    0.8211    303838



In [67]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

precision_sparse = precision_score(y_true_sparse, y_pred_sparse, average=None)
recall_sparse = recall_score(y_true_sparse, y_pred_sparse, average=None)
f1_sparse = f1_score(y_true_sparse, y_pred_sparse, average=None)
roc_sparse = roc_auc_score(y_true_sparse, y_proba_sparse, multi_class='ovr', average=None)

precision = precision_score(y_true, y_pred, average=None)
recall = recall_score(y_true, y_pred, average=None)
f1 = f1_score(y_true, y_pred, average=None)
roc = roc_auc_score(y_true, y_proba, multi_class='ovr', average=None)


precision_avg = precision_score(y_true, y_pred, average='weighted')
recall_avg = recall_score(y_true, y_pred, average='weighted')
f1_avg = f1_score(y_true, y_pred, average='weighted')
roc_avg = roc_auc_score(y_true, y_proba, multi_class='ovr', average='weighted')

precision_sparse_avg = precision_score(y_true_sparse, y_pred_sparse, average='weighted')
recall_sparse_avg = recall_score(y_true_sparse, y_pred_sparse, average='weighted')
f1_sparse_avg = f1_score(y_true_sparse, y_pred_sparse, average='weighted')
roc_sparse_avg = roc_auc_score(y_true_sparse, y_proba_sparse, multi_class='ovr', average='weighted')


for label, idx in mapper.items():
    print(
        f"{label} & {precision_sparse[idx]:.4f} & {recall_sparse[idx]:.4f} & {f1_sparse[idx]:.4f} & {roc_sparse[idx]:.4f} && "
        f"{precision[idx]:.4f} & {recall[idx]:.4f} & {f1[idx]:.4f} & {roc[idx]:.4f} \\\\"
    )
    

    
print(
    "\\midrule\n"
    f"Weighted avg. "
    f"& {precision_sparse_avg:.4f} "
    f"& {recall_sparse_avg:.4f} "
    f"& {f1_sparse_avg:.4f} "
    f"& {roc_sparse_avg:.4f} "
    f"&"
    f"& {precision_avg:.4f} "
    f"& {recall_avg:.4f} "
    f"& {f1_avg:.4f} "
    f"& {roc_avg:.4f} "
    "\n\\\\\\bottomrule"
)
    

HEKA & 0.6583 & 0.0844 & 0.1496 & 0.6230 && 0.9663 & 0.8287 & 0.8922 & 0.9933 \\
HTPC & 0.5817 & 0.1220 & 0.2017 & 0.7788 && 0.7320 & 0.8651 & 0.7930 & 0.9768 \\
boiler & 0.6282 & 0.2348 & 0.3418 & 0.7833 && 0.8149 & 0.8058 & 0.8103 & 0.9754 \\
computer monitor & 0.5510 & 0.0145 & 0.0283 & 0.6350 && 0.6198 & 0.5844 & 0.6016 & 0.9749 \\
desktop computer & 0.7164 & 0.0282 & 0.0542 & 0.6351 && 0.7366 & 0.5390 & 0.6225 & 0.9739 \\
fridge/freezer & 0.8039 & 0.3981 & 0.5325 & 0.8461 && 0.9441 & 0.9737 & 0.9587 & 0.9975 \\
laptop computer & 0.5722 & 0.0350 & 0.0659 & 0.6307 && 0.7052 & 0.6905 & 0.6978 & 0.9796 \\
light & 0.5422 & 0.9871 & 0.6999 & 0.6308 && 0.8333 & 0.8437 & 0.8385 & 0.9679 \\
microwave & 0.5674 & 0.0786 & 0.1381 & 0.6400 && 0.7513 & 0.8617 & 0.8027 & 0.9933 \\
server computer & 0.5985 & 0.2464 & 0.3491 & 0.8407 && 0.8201 & 0.7297 & 0.7723 & 0.9920 \\
television & 0.5538 & 0.0598 & 0.1080 & 0.7848 && 0.7415 & 0.5863 & 0.6548 & 0.9641 \\
washer dryer & 0.8337 & 0.0248 & 0.0481

In [71]:
precision_avg - precision_sparse_avg
#for label, diff in zip(labels, precision - precision_sparse):
#print(label, np.round(diff, 4))

np.round(precision_avg - precision_sparse_avg, 4), np.round(recall_avg - recall_sparse_avg, 4), np.round(f1_avg - f1_sparse_avg, 4)

(0.2316, 0.2707, 0.384)

In [78]:
for label, prev, now in zip(labels, f1_sparse, f1):
    print(label, np.round((now - prev) / prev, 4))

HEKA 4.9629
HTPC 2.9315
boiler 1.3709
computer monitor 20.2578
desktop computer 10.484
fridge/freezer 0.8002
laptop computer 9.5849
light 0.198
microwave 4.8133
server computer 1.2122
television 5.0651
washer dryer 13.1653


In [81]:
for prev, now in zip([precision_sparse_avg, recall_sparse_avg, f1_sparse_avg, roc_sparse_avg], [precision_avg, recall_avg, f1_avg, roc_avg]):
    print(np.round((now - prev) / prev, 4))

0.39
0.4898
0.8786
0.4799


In [9]:
#_y_proba = expit(y_proba)
#row_sums = np.sum(_y_proba, axis=1) # normalization
#row_sums[row_sums == 0] = 1e-12
#row_sums = np.repeat(row_sums.reshape(-1, 1), 1000, axis=-1)
#_y_proba = _y_proba / row_sums # these should be histograms

In [10]:
for key in sparse:
    print(f"{key}: {nonsparse[key] / sparse[key]:.4f}")

accuracy: 1.4898
precision: 1.3900
recall: 1.4898
f1: 1.8786
roc: 1.4799


In [11]:
#y_proba[:, y_true].shape

In [12]:
scores_sparse = [accuracy_score(fold["y_true"], fold["y_pred"]) for fold in report["with-sparse"]]
scores = [accuracy_score(fold["y_true"], fold["y_pred"]) for fold in report["without-sparse"]]

In [20]:
scores_sparse

[0.5151154039136979,
 0.5080280983442047,
 0.5020070245860512,
 0.5081378575012544,
 0.564350225790266,
 0.6142122428499749,
 0.6165642247867537,
 0.5054488435907487,
 0.5925519404155233,
 0.5061074088592709,
 0.5025950607604861,
 0.6046569972559781,
 0.5049157193257546,
 0.578047824382595,
 0.617530380243042,
 0.6232693061544492,
 0.5042885143081145,
 0.5069698157585261,
 0.6252293218345747]

In [21]:
scores

[0.8301650825412706,
 0.8212231115557779,
 0.8400450225112557,
 0.8189719859929965,
 0.8329789894947474,
 0.8168459229614807,
 0.8244122061030515,
 0.8193471735867934,
 0.8268509254627313,
 0.8141454568194609,
 0.8371584016009005,
 0.8242761553373773,
 0.8310299543493215,
 0.8300919267087736,
 0.8135201050590958,
 0.8087048965042837,
 0.8150834844600088,
 0.8256519292101807,
 0.8122068663623289]

In [19]:
from scipy.stats import wilcoxon

# Calculate p value
stat, p = wilcoxon(scores, scores_sparse, zero_method='zsplit'); p

3.814697265625e-06