### Imports


In [1]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

from src.dataset_loaders import ISAdetectDataset
from src.models import EmbeddingAndCNNModel
from src.transforms import Vector1D

### Setup


In [None]:
# Specify the model
MODEL = EmbeddingAndCNNModel
TARGET_FEATURE = "endianness"

# Model hyperparameters
INPUT_LENGTH = 512
MAX_FILE_SPLITS = 1
DROPOUT_RATE = 0.3

# Training hyperparameters
SEED = random.randint(0, 1_000_000_000)
BATCH_SIZE = 64
NUM_EPOCHS = 2
LEARNING_RATE = 1e-4

# Specify which groups to use as validation set. Set to None to validate all groups.
VALIDATION_GROUPS = None
# VALIDATION_GROUPS = ["arm64", "hppa", "ia64", "riscv64", "sh4"]

# Set to an integer to limit the dataset size. Set to None to disable limit.
MAX_FILES_PER_ISA = None

# Print these values for debugging purposes
print("")
print("Model:", MODEL.__name__)
print("Target feature:", TARGET_FEATURE)
print("Validation groups:", VALIDATION_GROUPS)
print("")
print("Input length:", INPUT_LENGTH)
print("Max file splits:", MAX_FILE_SPLITS)
print("Dropout rate:", DROPOUT_RATE)
print("")
print("Seed: ", SEED)
print("Batch size:", BATCH_SIZE)
print("Number of epochs:", NUM_EPOCHS)
print("Learning rate:", LEARNING_RATE)
print("")

Target feature: endianness
Validation groups: ['arm64', 'hppa', 'ia64', 'riscv64', 'sh4']

Input length: 1024
Max file splits: 1
Dropout rate: 0.3

Batch size: 64
Number of epochs: 2
Learning rate: 0.0001



### Helper functions


In [None]:
def set_seed(seed: int = SEED):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def get_device():
    """
    Returns 'cuda' if CUDA is available, else 'mps' if Apple Silicon GPU is available,
    otherwise 'cpu'.
    """
    device = None
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    print(f"Using device: {device}")
    return device

### Prepare


In [None]:
device = get_device()
set_seed()

scaler = torch.cuda.amp.GradScaler()

dataset = ISAdetectDataset(
    dataset_path="../../dataset/ISAdetect/ISAdetect_full_dataset",
    feature_csv_path="../../dataset/ISAdetect-features.csv",
    transform=Vector1D(INPUT_LENGTH),
    file_byte_read_limit=INPUT_LENGTH,
    per_architecture_limit=MAX_FILES_PER_ISA,
    max_file_splits=MAX_FILE_SPLITS,
)

groups = list(map(lambda x: x["architecture"], dataset.metadata))
target_feature = list(map(lambda x: x[TARGET_FEATURE], dataset.metadata))

Using device: cuda


### Train and evaluate


In [5]:
logo = LeaveOneGroupOut()
label_encoder = LabelEncoder()

fold = 1
accuracies = {}
for train_idx, test_idx in logo.split(
    X=range(len(dataset)), y=target_feature, groups=groups
):
    set_seed()

    group_left_out = groups[test_idx[0]]

    if VALIDATION_GROUPS != None and group_left_out not in VALIDATION_GROUPS:
        continue

    print(f"\n=== Fold {fold} – leaving out group '{group_left_out}' ===")
    fold += 1

    all_train_labels = [dataset.metadata[i][TARGET_FEATURE] for i in train_idx]
    label_encoder.fit(all_train_labels)

    train_dataset = Subset(dataset, train_idx)
    test_dataset = Subset(dataset, test_idx)

    train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        prefetch_factor=2,
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
        prefetch_factor=2,
    )

    model = MODEL(input_length=INPUT_LENGTH, num_classes=2, dropout_rate=DROPOUT_RATE)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # Train model
    for epoch in range(NUM_EPOCHS):
        model.train()
        print(f"\nEpoch {epoch+1}:")

        total_training_loss = 0
        for images, labels in tqdm(train_loader):
            images = images.to(device)

            encoded_labels = torch.from_numpy(
                label_encoder.transform(labels[TARGET_FEATURE])
            ).to(device)

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                predictions = model(images)
                loss = criterion(predictions, encoded_labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_training_loss += loss.item()

        avg_training_loss = total_training_loss / len(train_loader)

        # Evaluate model
        model.eval()
        correct = 0
        total = 0
        total_test_loss = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                encoded_labels = torch.from_numpy(
                    label_encoder.transform(labels[TARGET_FEATURE])
                ).to(device)

                outputs = model(images)
                loss = criterion(outputs, encoded_labels)
                total_test_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct += (predicted == encoded_labels).sum().item()
                total += encoded_labels.size(0)

        avg_test_loss = total_test_loss / len(test_loader)
        accuracy = correct / total

        print(
            f"Training Loss: {avg_training_loss:.4f} | Test loss: {avg_test_loss:.4f}"
        )
        print(f"Test Accuracy: {100*accuracy:.2f}%")

    accuracies[group_left_out] = accuracy


=== Fold 1 – leaving out group 'arm64' ===

Epoch 1:


100%|██████████| 1414/1414 [00:08<00:00, 166.70it/s]


Training Loss: 0.0387 | Test loss: 1.8902
Test Accuracy: 62.39%

Epoch 2:


100%|██████████| 1414/1414 [00:07<00:00, 201.17it/s]


Training Loss: 0.0068 | Test loss: 0.8549
Test Accuracy: 85.90%

=== Fold 2 – leaving out group 'hppa' ===

Epoch 1:


100%|██████████| 1394/1394 [00:06<00:00, 205.77it/s]


Training Loss: 0.0389 | Test loss: 0.4742
Test Accuracy: 90.62%

Epoch 2:


100%|██████████| 1394/1394 [00:06<00:00, 200.25it/s]


Training Loss: 0.0065 | Test loss: 0.6981
Test Accuracy: 89.59%

=== Fold 3 – leaving out group 'ia64' ===

Epoch 1:


100%|██████████| 1391/1391 [00:06<00:00, 210.64it/s]


Training Loss: 0.0399 | Test loss: 1.9158
Test Accuracy: 61.95%

Epoch 2:


100%|██████████| 1391/1391 [00:07<00:00, 192.72it/s]


Training Loss: 0.0080 | Test loss: 5.9408
Test Accuracy: 1.02%

=== Fold 4 – leaving out group 'riscv64' ===

Epoch 1:


100%|██████████| 1402/1402 [00:06<00:00, 204.61it/s]


Training Loss: 0.0417 | Test loss: 0.1690
Test Accuracy: 92.91%

Epoch 2:


100%|██████████| 1402/1402 [00:06<00:00, 217.21it/s]


Training Loss: 0.0071 | Test loss: 0.5524
Test Accuracy: 79.79%

=== Fold 5 – leaving out group 'sh4' ===

Epoch 1:


100%|██████████| 1378/1378 [00:06<00:00, 208.73it/s]


Training Loss: 0.0437 | Test loss: 4.1993
Test Accuracy: 22.63%

Epoch 2:


100%|██████████| 1378/1378 [00:07<00:00, 176.54it/s]


Training Loss: 0.0075 | Test loss: 0.4865
Test Accuracy: 86.68%


### Evaluate


In [6]:
print("Test accuracies for each fold/group:")
for group, acc in accuracies.items():
    print(f"{group}: {100*acc:.2f}%")


# Print overall performance across folds
mean_acc = np.mean(list(accuracies.values()))
std_acc = np.std(list(accuracies.values()))
print(f"\nAverage LOGO cross-validated test accuracy: {mean_acc:.4f} ± {std_acc:.4f}")

Test accuracies for each fold/group:
arm64: 85.90%
hppa: 89.59%
ia64: 1.02%
riscv64: 79.79%
sh4: 86.68%

Average LOGO cross-validated test accuracy: 0.6860 ± 0.3394
