In [90]:
import torch
from torchmetrics import ConfusionMatrix
from torchmetrics import Accuracy, Precision, Recall, F1Score, AUROC

In [91]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cuda')

In [92]:
confmat = ConfusionMatrix(task='multiclass', num_classes=3).to(device)

In [93]:
preds = torch.tensor([0, 1, 1, 1, 0, 2, 0, 2, 1, 1]).to(device)
pred_probs = torch.tensor(
    [
        [0.6, 0.3, 0.1],
        [0.1, 0.8, 0.1],
        [0.2, 0.7, 0.1],
        [0.2, 0.6, 0.2],
        [0.9, 0.05, 0.05],
        [0.1, 0.3, 0.6],
        [0.7, 0.1, 0.2],
        [0.1, 0.1, 0.8],
        [0.4, 0.6, 0.0],
        [0.01, 0.9, 0.09],
    ]
).to(device)

targets = torch.tensor([0, 0, 1, 2, 0, 1, 0, 2, 2, 1]).to(device)

In [94]:
cm = confmat(preds, targets)

cm

tensor([[3, 1, 0],
        [0, 2, 1],
        [0, 2, 1]], device='cuda:0')

In [95]:
cm.shape[0]

3

In [96]:
def harmonic_mean(a, b):
    if a + b == 0: return 0
    return 2 * a * b / (a + b)

def manage_nan(tensor):
    return tensor if not torch.isnan(tensor).item() else torch.tensor(0.0).to(device)

def confmat_to_accuracy(confmat): # micro
    num_classes = confmat.shape[0]

    correct_preds = 0
    for i in range(num_classes):
        correct_preds += confmat[i, i]
    all_preds = confmat.sum()

    acc = manage_nan(correct_preds / all_preds)

    return acc.item()

def confmat_to_precision(confmat): # macro
    num_classes = confmat.shape[0]

    if num_classes == 2:
        return manage_nan(confmat[1, 1] / confmat[:, 1].sum()).item()

    prec = 0
    for i in range(num_classes):
        prec += manage_nan(confmat[i, i] / confmat[:, i].sum())
    prec /= num_classes

    return prec.item()

def confmat_to_recall(confmat): # macro
    num_classes = confmat.shape[0]

    if num_classes == 2:
        return manage_nan(confmat[1, 1] / confmat[1].sum()).item()

    rec = 0
    for i in range(num_classes):
        rec += manage_nan(confmat[i, i] / confmat[i].sum())
    rec /= num_classes

    return rec.item()

def confmat_to_f1score(confmat): # macro
    num_classes = confmat.shape[0]

    if num_classes == 2:
        return harmonic_mean(confmat_to_precision(confmat), confmat_to_recall(confmat))

    f1 = 0
    for i in range(num_classes):
        prec = manage_nan(confmat[i, i] / confmat[:, i].sum())
        rec = manage_nan(confmat[i, i] / confmat[i].sum())

        f1 += harmonic_mean(prec, rec)
    f1 /= num_classes

    return f1.item()

In [97]:
confmat_to_accuracy(cm), Accuracy(task='multiclass', num_classes=3, average='micro').to(device)(preds, targets).item()

(0.6000000238418579, 0.6000000238418579)

In [98]:
confmat_to_precision(cm), Precision(task='multiclass', num_classes=3, average='macro').to(device)(preds, targets).item()

(0.6333333253860474, 0.6333333253860474)

In [99]:
confmat_to_recall(cm), Recall(task='multiclass', num_classes=3, average='macro').to(device)(preds, targets).item()

(0.5833333730697632, 0.5833333134651184)

In [100]:
confmat_to_f1score(cm), F1Score(task='multiclass', num_classes=3, average='macro').to(device)(preds, targets).item()

(0.5857143402099609, 0.5857143402099609)

In [101]:
AUROC(task='multiclass', num_classes=3, average='macro').to(device)(pred_probs, targets).item()

0.7380952835083008

In [102]:
preds = torch.tensor([1, 1, 1, 2, 2, 2, 2, 2, 2]).to(device)
targets = torch.tensor([0, 1, 2, 0, 0, 1, 1, 2, 2]).to(device)

cm = confmat(preds, targets)

cm

tensor([[0, 1, 2],
        [0, 1, 2],
        [0, 1, 2]], device='cuda:0')

In [103]:
confmat_to_precision(cm), Precision(task='multiclass', num_classes=3, average='macro').to(device)(preds, targets).item()

(0.222222238779068, 0.2222222238779068)

In [104]:
torch.rand(10).unsqueeze(1)

tensor([[0.0874],
        [0.9560],
        [0.7305],
        [0.5089],
        [0.4520],
        [0.4855],
        [0.5623],
        [0.1986],
        [0.3550],
        [0.9742]])

In [105]:
cm = torch.tensor([[8, 0], [8, 0]])

confmat_to_precision(cm), confmat_to_recall(cm)

(0.0, 0.0)

In [110]:
harmonic_mean(0, 0)

0

In [107]:
cm.shape

torch.Size([2, 2])

In [108]:
# val code without using confmat

# criterion = nn.CrossEntropyLoss()
# accuracy_metric = Accuracy(task='multiclass', num_classes=3, average='micro').to(device)
# precision_metric = Precision(task='multiclass', num_classes=3, average='macro').to(device)
# recall_metric = Recall(task='multiclass', num_classes=3, average='macro').to(device)
# f1score_metric = F1Score(task='multiclass', num_classes=3, average='macro').to(device)
# auroc_metric = AUROC(task='multiclass', num_classes=3).to(device)

# if WANNA_TRAIN:=True:
#     length = len(val_dataloader)
#     loss = 0
#     acc = 0
#     prec = 0
#     rec = 0
#     f1 = 0
#     auroc = 0

#     model.eval()
#     with torch.inference_mode():
#         for X, y in tqdm(val_dataloader):
#             X, y = X.to(device), y.to(device)
#             logits = model(X)
#             probs = logits.softmax(1)
#             labels = probs.argmax(1)

#             loss += criterion(logits, y).item()
#             acc += accuracy_metric(labels, y).item()
#             prec += precision_metric(labels, y).item()
#             rec += recall_metric(labels, y).item()
#             f1 += f1score_metric(labels, y).item()
#             auroc += auroc_metric(probs, y).item()

#     loss /= length
#     acc /= length
#     prec /= length
#     rec /= length
#     f1 /= length
#     auroc /= length
    
#     baseline_perf['loss'] = loss
#     baseline_perf['acc'] = acc
#     baseline_perf['prec'] = prec
#     baseline_perf['rec'] = rec
#     baseline_perf['f1'] = f1
#     baseline_perf['auroc'] = auroc

# baseline_perf