In [None]:
batch_size = 128
img_size = 48

In [None]:
import json
import pprint

import pandas as pd
import torch
from torch.utils.data import DataLoader

from dataset import FERPlusDataset
from metrics import calc_metrics

# Evaluate model


In [None]:
test_ds = FERPlusDataset(
    "data/fer2013new.csv",
    img_root="data/ferplus_raw",
    split="PrivateTest",
)
test_loader = DataLoader(
    test_ds,
    batch_size=batch_size,
    shuffle=False,
    pin_memory=True,
)

In [None]:
model = torch.jit.load("data/model.pt")

In [None]:
device = torch.device(
    "mps"
    if torch.backends.mps.is_available()
    else "cuda" if torch.cuda.is_available() else "cpu"
)
print(device)

In [None]:
model.eval()
model.to(device)
y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        logits = model(images)
        y_true.append(labels.argmax(1).cpu())
        y_pred.append(logits.argmax(1).cpu())

y_true = torch.cat(y_true).numpy()
y_pred = torch.cat(y_pred).numpy()

metrics = calc_metrics(y_true, y_pred, test_ds.classes)
pprint.pprint(metrics)

In [None]:
# Export metrics for DVC metrics
with open(f"metrics/test.json", "w") as f:
    json.dump(metrics, f, indent=2)

# Export metrics for DVC plots (yes, different format ðŸ¤¦)
with open(f"plots/test_metrics.json", "w") as f:
    json.dump(
        [{"emotion": emotion} | metrics[emotion] for emotion in metrics.keys()],
        f,
        indent=2,
    )

pd.DataFrame(
    {
        "actual": [test_ds.classes[idx] for idx in y_true],
        "predicted": [test_ds.classes[idx] for idx in y_pred],
    }
).to_csv("plots/test_classes.csv", index=False)

In [None]:
# Export raw predictions
pd.DataFrame({"filename": test_ds.image_names, "prediction": y_pred, "true": y_true}).to_csv(
    "data/test_predictions.csv", index=False
)