### Imports


In [7]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

from src.dataset_loaders import ISAdetectDataset
from src.models import EmbeddingAndCNNModel
from src.transforms import Vector1D

### Setup


In [8]:
# Specify the model
MODEL = EmbeddingAndCNNModel
TARGET_FEATURE = "endianness"

# Model hyperparameters
BATCH_SIZE = 64
NUM_EPOCHS = 5
LEARNING_RATE = 1e-4

# Specify which groups to use as validation set. Set to None to validate all groups.
VALIDATION_GROUPS = None
# VALIDATION_GROUPS = ["ia64", "arm64", "m68k", "hppa", "ppc64"]

# Set to an integer to limit the dataset size. Set to None to disable limit.
MAX_FILES_PER_ISA = None

### Helper functions


In [9]:
def set_seed(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def get_device():
    """
    Returns 'cuda' if CUDA is available, else 'mps' if Apple Silicon GPU is available,
    otherwise 'cpu'.
    """
    device = None
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    print(f"Using device: {device}")
    return device

### Prepare


In [10]:
device = get_device()
set_seed(42)

scaler = torch.cuda.amp.GradScaler()

dataset = ISAdetectDataset(
    dataset_path="../../dataset/ISAdetect/ISAdetect_full_dataset",
    transform=Vector1D(2048),
    file_byte_read_limit=2048,
    per_architecture_limit=MAX_FILES_PER_ISA,
)

groups = list(map(lambda x: x["architecture"], dataset.metadata))
target_feature = list(map(lambda x: x[TARGET_FEATURE], dataset.metadata))

Using device: cuda


### Train and evaluate


In [11]:
logo = LeaveOneGroupOut()
label_encoder = LabelEncoder()

fold = 1
accuracies = {}
for train_idx, test_idx in logo.split(
    X=range(len(dataset)), y=target_feature, groups=groups
):
    set_seed()

    group_left_out = groups[test_idx[0]]

    if VALIDATION_GROUPS != None and group_left_out not in VALIDATION_GROUPS:
        continue

    print(f"\n=== Fold {fold} – leaving out group '{group_left_out}' ===")
    fold += 1

    all_train_labels = [dataset.metadata[i][TARGET_FEATURE] for i in train_idx]
    label_encoder.fit(all_train_labels)

    train_dataset = Subset(dataset, train_idx)
    test_dataset = Subset(dataset, test_idx)

    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        prefetch_factor=2,
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
        prefetch_factor=2,
    )

    model = MODEL(input_length=2048, num_classes=2)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    # Train model
    for epoch in range(NUM_EPOCHS):
        model.train()
        print(f"\nEpoch {epoch+1}:")

        total_training_loss = 0
        for images, labels in tqdm(train_loader):
            images = images.to(device)

            encoded_labels = torch.from_numpy(
                label_encoder.transform(labels[TARGET_FEATURE])
            ).to(device)

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                predictions = model(images)
                loss = criterion(predictions, encoded_labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_training_loss += loss.item()

        avg_training_loss = total_training_loss / len(train_loader)

        # Evaluate model
        model.eval()
        correct = 0
        total = 0
        total_test_loss = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                encoded_labels = torch.from_numpy(
                    label_encoder.transform(labels[TARGET_FEATURE])
                ).to(device)

                outputs = model(images)
                loss = criterion(outputs, encoded_labels)
                total_test_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct += (predicted == encoded_labels).sum().item()
                total += encoded_labels.size(0)

        avg_test_loss = total_test_loss / len(test_loader)
        accuracy = correct / total

        print(
            f"Training Loss: {avg_training_loss:.4f} | Test loss: {avg_test_loss:.4f}"
        )
        print(f"Test Accuracy: {100*accuracy:.2f}%")

    accuracies[group_left_out] = accuracy


=== Fold 1 – leaving out group 'alpha' ===

Epoch 1:


100%|██████████| 1445/1445 [00:07<00:00, 185.52it/s]


Training Loss: 0.0420 | Test loss: 8.7420
Test Accuracy: 0.65%

Epoch 2:


100%|██████████| 1445/1445 [00:07<00:00, 186.44it/s]


Training Loss: 0.0007 | Test loss: 3.5968
Test Accuracy: 24.63%

Epoch 3:


100%|██████████| 1445/1445 [00:07<00:00, 192.53it/s]


Training Loss: 0.0007 | Test loss: 1.2527
Test Accuracy: 69.15%

Epoch 4:


100%|██████████| 1445/1445 [00:07<00:00, 189.86it/s]


Training Loss: 0.0005 | Test loss: 0.1427
Test Accuracy: 96.17%

Epoch 5:


100%|██████████| 1445/1445 [00:07<00:00, 190.10it/s]


Training Loss: 0.0003 | Test loss: 1.3505
Test Accuracy: 77.54%

=== Fold 2 – leaving out group 'amd64' ===

Epoch 1:


100%|██████████| 1438/1438 [00:07<00:00, 190.65it/s]


Training Loss: 0.0408 | Test loss: 0.2579
Test Accuracy: 89.85%

Epoch 2:


100%|██████████| 1438/1438 [00:07<00:00, 188.46it/s]


Training Loss: 0.0006 | Test loss: 0.0144
Test Accuracy: 99.66%

Epoch 3:


100%|██████████| 1438/1438 [00:07<00:00, 188.56it/s]


Training Loss: 0.0006 | Test loss: 0.0031
Test Accuracy: 99.91%

Epoch 4:


100%|██████████| 1438/1438 [00:07<00:00, 190.19it/s]


Training Loss: 0.0001 | Test loss: 0.0018
Test Accuracy: 99.91%

Epoch 5:


100%|██████████| 1438/1438 [00:07<00:00, 190.63it/s]


Training Loss: 0.0012 | Test loss: 0.0039
Test Accuracy: 99.93%

=== Fold 3 – leaving out group 'arm64' ===

Epoch 1:


100%|██████████| 1450/1450 [00:07<00:00, 193.76it/s]


Training Loss: 0.0389 | Test loss: 4.5232
Test Accuracy: 20.32%

Epoch 2:


100%|██████████| 1450/1450 [00:07<00:00, 187.42it/s]


Training Loss: 0.0007 | Test loss: 3.0988
Test Accuracy: 38.64%

Epoch 3:


100%|██████████| 1450/1450 [00:07<00:00, 188.04it/s]


Training Loss: 0.0005 | Test loss: 1.3704
Test Accuracy: 64.99%

Epoch 4:


100%|██████████| 1450/1450 [00:07<00:00, 190.77it/s]


Training Loss: 0.0006 | Test loss: 2.4098
Test Accuracy: 54.81%

Epoch 5:


100%|██████████| 1450/1450 [00:07<00:00, 189.33it/s]


Training Loss: 0.0001 | Test loss: 1.0449
Test Accuracy: 81.27%

=== Fold 4 – leaving out group 'armel' ===

Epoch 1:


100%|██████████| 1444/1444 [00:07<00:00, 193.52it/s]


Training Loss: 0.0431 | Test loss: 0.0563
Test Accuracy: 98.95%

Epoch 2:


100%|██████████| 1444/1444 [00:07<00:00, 187.86it/s]


Training Loss: 0.0012 | Test loss: 0.2734
Test Accuracy: 94.62%

Epoch 3:


100%|██████████| 1444/1444 [00:07<00:00, 190.38it/s]


Training Loss: 0.0004 | Test loss: 0.1813
Test Accuracy: 97.42%

Epoch 4:


100%|██████████| 1444/1444 [00:07<00:00, 193.22it/s]


Training Loss: 0.0006 | Test loss: 0.0875
Test Accuracy: 99.17%

Epoch 5:


100%|██████████| 1444/1444 [00:07<00:00, 190.38it/s]


Training Loss: 0.0007 | Test loss: 0.0324
Test Accuracy: 99.42%

=== Fold 5 – leaving out group 'armhf' ===

Epoch 1:


100%|██████████| 1444/1444 [00:07<00:00, 193.70it/s]


Training Loss: 0.0420 | Test loss: 0.0317
Test Accuracy: 99.05%

Epoch 2:


100%|██████████| 1444/1444 [00:07<00:00, 187.84it/s]


Training Loss: 0.0007 | Test loss: 0.0408
Test Accuracy: 99.20%

Epoch 3:


100%|██████████| 1444/1444 [00:07<00:00, 187.35it/s]


Training Loss: 0.0005 | Test loss: 0.0512
Test Accuracy: 99.10%

Epoch 4:


100%|██████████| 1444/1444 [00:07<00:00, 188.39it/s]


Training Loss: 0.0004 | Test loss: 0.0543
Test Accuracy: 99.25%

Epoch 5:


100%|██████████| 1444/1444 [00:07<00:00, 190.15it/s]


Training Loss: 0.0003 | Test loss: 0.0201
Test Accuracy: 99.80%

=== Fold 6 – leaving out group 'hppa' ===

Epoch 1:


100%|██████████| 1431/1431 [00:07<00:00, 189.20it/s]


Training Loss: 0.0377 | Test loss: 0.2920
Test Accuracy: 91.28%

Epoch 2:


100%|██████████| 1431/1431 [00:07<00:00, 187.00it/s]


Training Loss: 0.0002 | Test loss: 0.4502
Test Accuracy: 91.24%

Epoch 3:


100%|██████████| 1431/1431 [00:07<00:00, 190.93it/s]


Training Loss: 0.0014 | Test loss: 0.7588
Test Accuracy: 87.70%

Epoch 4:


100%|██████████| 1431/1431 [00:07<00:00, 189.46it/s]


Training Loss: 0.0004 | Test loss: 0.3914
Test Accuracy: 92.17%

Epoch 5:


100%|██████████| 1431/1431 [00:07<00:00, 190.03it/s]


Training Loss: 0.0004 | Test loss: 1.0740
Test Accuracy: 78.10%

=== Fold 7 – leaving out group 'i386' ===

Epoch 1:


100%|██████████| 1427/1427 [00:07<00:00, 189.57it/s]


Training Loss: 0.0410 | Test loss: 0.0370
Test Accuracy: 98.85%

Epoch 2:


100%|██████████| 1427/1427 [00:07<00:00, 189.07it/s]


Training Loss: 0.0011 | Test loss: 0.0223
Test Accuracy: 99.30%

Epoch 3:


100%|██████████| 1427/1427 [00:07<00:00, 190.65it/s]


Training Loss: 0.0007 | Test loss: 0.0124
Test Accuracy: 99.41%

Epoch 4:


100%|██████████| 1427/1427 [00:07<00:00, 193.04it/s]


Training Loss: 0.0002 | Test loss: 0.0053
Test Accuracy: 99.86%

Epoch 5:


100%|██████████| 1427/1427 [00:07<00:00, 186.88it/s]


Training Loss: 0.0007 | Test loss: 0.0056
Test Accuracy: 99.59%

=== Fold 8 – leaving out group 'ia64' ===

Epoch 1:


100%|██████████| 1429/1429 [00:07<00:00, 189.38it/s]


Training Loss: 0.0371 | Test loss: 12.0987
Test Accuracy: 0.08%

Epoch 2:


100%|██████████| 1429/1429 [00:07<00:00, 189.75it/s]


Training Loss: 0.0009 | Test loss: 8.6305
Test Accuracy: 21.97%

Epoch 3:


100%|██████████| 1429/1429 [00:07<00:00, 189.72it/s]


Training Loss: 0.0002 | Test loss: 14.2277
Test Accuracy: 0.30%

Epoch 4:


100%|██████████| 1429/1429 [00:07<00:00, 190.91it/s]


Training Loss: 0.0008 | Test loss: 9.7538
Test Accuracy: 22.49%

Epoch 5:


100%|██████████| 1429/1429 [00:07<00:00, 189.35it/s]


Training Loss: 0.0003 | Test loss: 9.0331
Test Accuracy: 21.01%

=== Fold 9 – leaving out group 'm68k' ===

Epoch 1:


100%|██████████| 1438/1438 [00:07<00:00, 190.75it/s]


Training Loss: 0.0351 | Test loss: 9.2811
Test Accuracy: 1.86%

Epoch 2:


100%|██████████| 1438/1438 [00:07<00:00, 192.35it/s]


Training Loss: 0.0005 | Test loss: 11.6835
Test Accuracy: 1.64%

Epoch 3:


100%|██████████| 1438/1438 [00:07<00:00, 189.97it/s]


Training Loss: 0.0007 | Test loss: 9.6650
Test Accuracy: 2.02%

Epoch 4:


100%|██████████| 1438/1438 [00:07<00:00, 191.68it/s]


Training Loss: 0.0004 | Test loss: 14.9907
Test Accuracy: 1.34%

Epoch 5:


100%|██████████| 1438/1438 [00:07<00:00, 187.62it/s]


Training Loss: 0.0003 | Test loss: 16.2957
Test Accuracy: 1.36%

=== Fold 10 – leaving out group 'mips' ===

Epoch 1:


100%|██████████| 1451/1451 [00:07<00:00, 191.25it/s]


Training Loss: 0.0397 | Test loss: 0.0074
Test Accuracy: 99.78%

Epoch 2:


100%|██████████| 1451/1451 [00:07<00:00, 189.34it/s]


Training Loss: 0.0011 | Test loss: 0.0114
Test Accuracy: 99.58%

Epoch 3:


100%|██████████| 1451/1451 [00:07<00:00, 190.70it/s]


Training Loss: 0.0004 | Test loss: 0.0804
Test Accuracy: 98.65%

Epoch 4:


100%|██████████| 1451/1451 [00:07<00:00, 186.69it/s]


Training Loss: 0.0003 | Test loss: 0.0227
Test Accuracy: 99.47%

Epoch 5:


100%|██████████| 1451/1451 [00:07<00:00, 187.87it/s]


Training Loss: 0.0008 | Test loss: 0.0004
Test Accuracy: 100.00%

=== Fold 11 – leaving out group 'mips64el' ===

Epoch 1:


100%|██████████| 1439/1439 [00:07<00:00, 188.11it/s]


Training Loss: 0.0395 | Test loss: 0.1440
Test Accuracy: 96.10%

Epoch 2:


100%|██████████| 1439/1439 [00:07<00:00, 187.82it/s]


Training Loss: 0.0007 | Test loss: 0.2123
Test Accuracy: 94.34%

Epoch 3:


100%|██████████| 1439/1439 [00:07<00:00, 189.25it/s]


Training Loss: 0.0006 | Test loss: 0.2074
Test Accuracy: 96.03%

Epoch 4:


100%|██████████| 1439/1439 [00:07<00:00, 191.21it/s]


Training Loss: 0.0006 | Test loss: 0.0441
Test Accuracy: 99.07%

Epoch 5:


100%|██████████| 1439/1439 [00:07<00:00, 187.19it/s]


Training Loss: 0.0001 | Test loss: 0.2042
Test Accuracy: 95.96%

=== Fold 12 – leaving out group 'mipsel' ===

Epoch 1:


100%|██████████| 1448/1448 [00:07<00:00, 186.01it/s]


Training Loss: 0.0401 | Test loss: 0.0095
Test Accuracy: 99.68%

Epoch 2:


100%|██████████| 1448/1448 [00:07<00:00, 190.62it/s]


Training Loss: 0.0010 | Test loss: 0.0294
Test Accuracy: 99.10%

Epoch 3:


100%|██████████| 1448/1448 [00:07<00:00, 190.12it/s]


Training Loss: 0.0007 | Test loss: 0.0027
Test Accuracy: 99.95%

Epoch 4:


100%|██████████| 1448/1448 [00:07<00:00, 189.46it/s]


Training Loss: 0.0001 | Test loss: 0.0021
Test Accuracy: 99.95%

Epoch 5:


100%|██████████| 1448/1448 [00:07<00:00, 189.51it/s]


Training Loss: 0.0000 | Test loss: 0.0040
Test Accuracy: 99.95%

=== Fold 13 – leaving out group 'powerpc' ===

Epoch 1:


100%|██████████| 1450/1450 [00:07<00:00, 191.09it/s]


Training Loss: 0.0397 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 2:


100%|██████████| 1450/1450 [00:07<00:00, 187.93it/s]


Training Loss: 0.0004 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 3:


100%|██████████| 1450/1450 [00:07<00:00, 192.55it/s]


Training Loss: 0.0012 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 4:


100%|██████████| 1450/1450 [00:07<00:00, 192.57it/s]


Training Loss: 0.0005 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1450/1450 [00:07<00:00, 189.70it/s]


Training Loss: 0.0003 | Test loss: 0.0000
Test Accuracy: 100.00%

=== Fold 14 – leaving out group 'powerpcspe' ===

Epoch 1:


100%|██████████| 1445/1445 [00:07<00:00, 192.37it/s]


Training Loss: 0.0390 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 2:


100%|██████████| 1445/1445 [00:07<00:00, 192.30it/s]


Training Loss: 0.0008 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 3:


100%|██████████| 1445/1445 [00:07<00:00, 192.50it/s]


Training Loss: 0.0004 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 4:


100%|██████████| 1445/1445 [00:07<00:00, 192.53it/s]


Training Loss: 0.0006 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1445/1445 [00:07<00:00, 192.85it/s]


Training Loss: 0.0003 | Test loss: 0.0000
Test Accuracy: 100.00%

=== Fold 15 – leaving out group 'ppc64' ===

Epoch 1:


100%|██████████| 1462/1462 [00:07<00:00, 190.18it/s]


Training Loss: 0.0400 | Test loss: 0.0163
Test Accuracy: 99.82%

Epoch 2:


100%|██████████| 1462/1462 [00:07<00:00, 187.23it/s]


Training Loss: 0.0006 | Test loss: 0.6692
Test Accuracy: 74.66%

Epoch 3:


100%|██████████| 1462/1462 [00:07<00:00, 185.43it/s]


Training Loss: 0.0004 | Test loss: 0.2501
Test Accuracy: 87.33%

Epoch 4:


100%|██████████| 1462/1462 [00:07<00:00, 187.57it/s]


Training Loss: 0.0004 | Test loss: 0.1948
Test Accuracy: 88.95%

Epoch 5:


100%|██████████| 1462/1462 [00:07<00:00, 188.86it/s]


Training Loss: 0.0002 | Test loss: 0.0231
Test Accuracy: 98.98%

=== Fold 16 – leaving out group 'ppc64el' ===

Epoch 1:


100%|██████████| 1452/1452 [00:07<00:00, 190.00it/s]


Training Loss: 0.0402 | Test loss: 0.0016
Test Accuracy: 99.97%

Epoch 2:


100%|██████████| 1452/1452 [00:07<00:00, 187.58it/s]


Training Loss: 0.0007 | Test loss: 0.0024
Test Accuracy: 99.97%

Epoch 3:


100%|██████████| 1452/1452 [00:07<00:00, 186.79it/s]


Training Loss: 0.0004 | Test loss: 0.0017
Test Accuracy: 99.97%

Epoch 4:


100%|██████████| 1452/1452 [00:07<00:00, 187.58it/s]


Training Loss: 0.0003 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1452/1452 [00:07<00:00, 184.99it/s]


Training Loss: 0.0012 | Test loss: 0.0000
Test Accuracy: 100.00%

=== Fold 17 – leaving out group 'riscv64' ===

Epoch 1:


100%|██████████| 1437/1437 [00:07<00:00, 190.36it/s]


Training Loss: 0.0416 | Test loss: 0.0259
Test Accuracy: 99.35%

Epoch 2:


100%|██████████| 1437/1437 [00:07<00:00, 185.05it/s]


Training Loss: 0.0009 | Test loss: 0.0947
Test Accuracy: 97.23%

Epoch 3:


100%|██████████| 1437/1437 [00:07<00:00, 185.84it/s]


Training Loss: 0.0006 | Test loss: 0.0804
Test Accuracy: 97.97%

Epoch 4:


100%|██████████| 1437/1437 [00:07<00:00, 185.86it/s]


Training Loss: 0.0002 | Test loss: 0.0165
Test Accuracy: 99.59%

Epoch 5:


100%|██████████| 1437/1437 [00:07<00:00, 186.41it/s]


Training Loss: 0.0004 | Test loss: 0.1019
Test Accuracy: 97.82%

=== Fold 18 – leaving out group 's390' ===

Epoch 1:


100%|██████████| 1426/1426 [00:07<00:00, 187.76it/s]


Training Loss: 0.0392 | Test loss: 0.0001
Test Accuracy: 100.00%

Epoch 2:


100%|██████████| 1426/1426 [00:07<00:00, 188.64it/s]


Training Loss: 0.0008 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 3:


100%|██████████| 1426/1426 [00:07<00:00, 187.07it/s]


Training Loss: 0.0010 | Test loss: 0.0002
Test Accuracy: 100.00%

Epoch 4:


100%|██████████| 1426/1426 [00:07<00:00, 184.82it/s]


Training Loss: 0.0005 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1426/1426 [00:07<00:00, 186.55it/s]


Training Loss: 0.0006 | Test loss: 0.0016
Test Accuracy: 99.92%

=== Fold 19 – leaving out group 's390x' ===

Epoch 1:


100%|██████████| 1451/1451 [00:07<00:00, 190.33it/s]


Training Loss: 0.0395 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 2:


100%|██████████| 1451/1451 [00:07<00:00, 186.79it/s]


Training Loss: 0.0009 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 3:


100%|██████████| 1451/1451 [00:07<00:00, 187.23it/s]


Training Loss: 0.0005 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 4:


100%|██████████| 1451/1451 [00:07<00:00, 188.01it/s]


Training Loss: 0.0004 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1451/1451 [00:07<00:00, 189.15it/s]


Training Loss: 0.0005 | Test loss: 0.0000
Test Accuracy: 100.00%

=== Fold 20 – leaving out group 'sh4' ===

Epoch 1:


100%|██████████| 1414/1414 [00:07<00:00, 188.46it/s]


Training Loss: 0.0426 | Test loss: 0.6202
Test Accuracy: 79.07%

Epoch 2:


100%|██████████| 1414/1414 [00:07<00:00, 186.32it/s]


Training Loss: 0.0010 | Test loss: 0.4718
Test Accuracy: 87.05%

Epoch 3:


100%|██████████| 1414/1414 [00:07<00:00, 186.05it/s]


Training Loss: 0.0006 | Test loss: 0.5148
Test Accuracy: 87.03%

Epoch 4:


100%|██████████| 1414/1414 [00:07<00:00, 187.40it/s]


Training Loss: 0.0001 | Test loss: 0.4208
Test Accuracy: 89.75%

Epoch 5:


100%|██████████| 1414/1414 [00:07<00:00, 188.50it/s]


Training Loss: 0.0002 | Test loss: 0.0103
Test Accuracy: 99.81%

=== Fold 21 – leaving out group 'sparc' ===

Epoch 1:


100%|██████████| 1429/1429 [00:07<00:00, 186.41it/s]


Training Loss: 0.0378 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 2:


100%|██████████| 1429/1429 [00:07<00:00, 188.92it/s]


Training Loss: 0.0007 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 3:


100%|██████████| 1429/1429 [00:07<00:00, 186.98it/s]


Training Loss: 0.0006 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 4:


100%|██████████| 1429/1429 [00:07<00:00, 187.55it/s]


Training Loss: 0.0003 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1429/1429 [00:07<00:00, 188.34it/s]


Training Loss: 0.0000 | Test loss: 0.0000
Test Accuracy: 100.00%

=== Fold 22 – leaving out group 'sparc64' ===

Epoch 1:


100%|██████████| 1455/1455 [00:07<00:00, 185.41it/s]


Training Loss: 0.0371 | Test loss: 0.0049
Test Accuracy: 99.91%

Epoch 2:


100%|██████████| 1455/1455 [00:07<00:00, 190.48it/s]


Training Loss: 0.0010 | Test loss: 0.0023
Test Accuracy: 99.94%

Epoch 3:


100%|██████████| 1455/1455 [00:07<00:00, 184.99it/s]


Training Loss: 0.0004 | Test loss: 0.0022
Test Accuracy: 99.94%

Epoch 4:


100%|██████████| 1455/1455 [00:07<00:00, 186.52it/s]


Training Loss: 0.0004 | Test loss: 0.0228
Test Accuracy: 99.82%

Epoch 5:


100%|██████████| 1455/1455 [00:07<00:00, 184.43it/s]


Training Loss: 0.0005 | Test loss: 0.0042
Test Accuracy: 99.94%

=== Fold 23 – leaving out group 'x32' ===

Epoch 1:


100%|██████████| 1441/1441 [00:07<00:00, 183.61it/s]


Training Loss: 0.0410 | Test loss: 0.0089
Test Accuracy: 99.76%

Epoch 2:


100%|██████████| 1441/1441 [00:07<00:00, 183.89it/s]


Training Loss: 0.0006 | Test loss: 0.0092
Test Accuracy: 99.52%

Epoch 3:


100%|██████████| 1441/1441 [00:07<00:00, 184.82it/s]


Training Loss: 0.0005 | Test loss: 0.0000
Test Accuracy: 100.00%

Epoch 4:


100%|██████████| 1441/1441 [00:07<00:00, 187.14it/s]


Training Loss: 0.0007 | Test loss: 0.0001
Test Accuracy: 100.00%

Epoch 5:


100%|██████████| 1441/1441 [00:07<00:00, 188.25it/s]


Training Loss: 0.0006 | Test loss: 0.0007
Test Accuracy: 100.00%


### Evaluate


In [12]:
print("Test accuracies for each fold/group:")
for group, acc in accuracies.items():
    print(f"{group}: {100*acc:.2f}%")


# Print overall performance across folds
mean_acc = np.mean(list(accuracies.values()))
std_acc = np.std(list(accuracies.values()))
print(f"\nAverage LOGO cross-validated test accuracy: {mean_acc:.4f} ± {std_acc:.4f}")

Test accuracies for each fold/group:
alpha: 77.54%
amd64: 99.93%
arm64: 81.27%
armel: 99.42%
armhf: 99.80%
hppa: 78.10%
i386: 99.59%
ia64: 21.01%
m68k: 1.36%
mips: 100.00%
mips64el: 95.96%
mipsel: 99.95%
powerpc: 100.00%
powerpcspe: 100.00%
ppc64: 98.98%
ppc64el: 100.00%
riscv64: 97.82%
s390: 99.92%
s390x: 100.00%
sh4: 99.81%
sparc: 100.00%
sparc64: 99.94%
x32: 100.00%

Average LOGO cross-validated test accuracy: 0.8915 ± 0.2521
