#### Set up hyperparams


In [1]:
import random
from torchvision import models
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Lambda, ToTensor
from tqdm import tqdm
from src.dataset_loaders import (
    ISAdetectDataset,
    random_train_test_split,
)
from src.transforms import GrayScaleImage

device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)
print(f"Using device: {device}")


num_epochs = 1
learning_rate = 0.001
batch_size = 16
SEED = random.randint(0, 1000)

transform = Compose(
    [
        GrayScaleImage(255, 255),
        Lambda(lambda x: x.float() / 255.0),
        # 2. Add a dimension to make it a single-channel image (1, 255, 255)
        # Lambda(lambda x: x.unsqueeze(0)),
        # 3. Duplicate the grayscale channel to create 3 channels (3, 255, 255)
        Lambda(lambda x: x.repeat(3, 1, 1)),
    ]
)

dataset = ISAdetectDataset(
    "../../dataset/ISAdetect/ISAdetect_full_dataset",
    transform=transform,
    file_byte_read_limit=255 * 255,
)
train_set, test_set = random_train_test_split(
    dataset=dataset, test_split=0.2, seed=SEED
)

train_dataloader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

Using device: mps


#### Set up model


In [2]:
model = models.efficientnet_v2_s(weights=models.EfficientNet_V2_S_Weights.DEFAULT)

# Freeze all layers in the model
for param in model.parameters():
    param.requires_grad = False

num_inputs = model.classifier[1].in_features

model.classifier[1] = nn.Linear(num_inputs, 1)

for param in model.classifier[1].parameters():
    param.requires_grad = True

Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /Users/stiansulebak/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:09<00:00, 9.28MB/s]


#### Criterions and optim


In [3]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params=model.classifier[1].parameters(), lr=learning_rate)

with torch.no_grad():
    it = iter(train_dataloader)
    first_x, first_y = next(it)
    print(first_x[0])
    print(first_x.shape)
    out = model(first_x)
    print(out)

tensor([[[0.9922, 0.4824, 0.7490,  ..., 0.0667, 0.6039, 0.2745],
         [0.9765, 0.0627, 0.7608,  ..., 0.8157, 0.0667, 0.8549],
         [0.2745, 0.9765, 0.0627,  ..., 0.0000, 0.8157, 0.0667],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.9922, 0.4824, 0.7490,  ..., 0.0667, 0.6039, 0.2745],
         [0.9765, 0.0627, 0.7608,  ..., 0.8157, 0.0667, 0.8549],
         [0.2745, 0.9765, 0.0627,  ..., 0.0000, 0.8157, 0.0667],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.9922, 0.4824, 0.7490,  ..., 0.0667, 0.6039, 0.2745],
         [0.9765, 0.0627, 0.7608,  ..., 0.8157, 0.0667, 0.8549],
         [0.2745, 0.9765, 0.0627,  ..., 0.0000, 0.8157, 0.

#### Training loop


In [4]:
model.to(device)
dataset.use_code_only = True
endianness_map = {"little": 0, "big": 1}
for epoch in range(num_epochs):
    for i, (batch_x, batch_y) in enumerate(tqdm(train_dataloader)):
        batch_x = batch_x.to(device)
        optimizer.zero_grad()
        output = model(batch_x)
        targets = torch.tensor(
            [endianness_map[e] for e in batch_y["endianness"]],
            dtype=torch.float32,
        ).to(device)
        loss = criterion(output, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()
        if (i + 1) % 400 == 0:
            print(f"Loss {loss.item()}")

  8%|▊         | 400/4820 [01:42<18:39,  3.95it/s] 

Loss 0.5565081834793091


 17%|█▋        | 800/4820 [03:22<16:33,  4.05it/s]

Loss 0.3812137246131897


 25%|██▍       | 1200/4820 [05:07<14:54,  4.05it/s]

Loss 0.5449435710906982


 33%|███▎      | 1600/4820 [06:52<15:06,  3.55it/s]

Loss 0.6166874766349792


 41%|████▏     | 2000/4820 [08:46<14:18,  3.29it/s]

Loss 0.4251658320426941


 50%|████▉     | 2400/4820 [10:41<12:11,  3.31it/s]

Loss 0.5342374444007874


 58%|█████▊    | 2800/4820 [12:40<09:40,  3.48it/s]

Loss 0.4453434944152832


 66%|██████▋   | 3200/4820 [14:36<07:43,  3.50it/s]

Loss 0.427639365196228


 75%|███████▍  | 3600/4820 [16:34<06:47,  2.99it/s]

Loss 0.3840476870536804


 83%|████████▎ | 4000/4820 [18:39<04:01,  3.39it/s]

Loss 0.45626047253608704


 91%|█████████▏| 4400/4820 [20:42<02:08,  3.27it/s]

Loss 0.456272155046463


100%|█████████▉| 4800/4820 [22:53<00:06,  3.04it/s]

Loss 0.6341553926467896


100%|██████████| 4820/4820 [23:06<00:00,  3.48it/s]


In [5]:
dataset.use_code_only = True
print(f"{dataset.use_code_only=}")
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    arch_stats = {}
    for batch_x, batch_y in tqdm(test_dataloader):
        batch_x = batch_x.to(device)
        output = model(batch_x)
        batch_y_endian = [endianness_map[e] for e in batch_y["endianness"]]
        batch_y_arch = batch_y["architecture"]
        for i in range(len(output)):
            if output[i] >= 0.5:
                pred = 1
            else:
                pred = 0

            current_arch = batch_y_arch[i]
            if current_arch not in arch_stats:
                arch_stats[current_arch] = {"correct": 0, "total": 0}

            if pred == batch_y_endian[i]:
                correct += 1
                arch_stats[current_arch]["correct"] += 1

            arch_stats[current_arch]["total"] += 1
            total += 1

    overall_accuracy = correct / total
    print("\nPer-Architecture Accuracies:")
    for arch in sorted(arch_stats.keys()):
        arch_correct = arch_stats[arch]["correct"]
        arch_total = arch_stats[arch]["total"]
        arch_accuracy = arch_correct / arch_total
        arch_stats[arch]["accuracy"] = arch_accuracy
        print(f"{arch:10s}: {arch_accuracy:.4f} ({arch_correct}/{arch_total})")

dataset.use_code_only=True


100%|██████████| 1205/1205 [05:06<00:00,  3.93it/s]


Per-Architecture Accuracies:
alpha     : 0.8627 (691/801)
amd64     : 0.9524 (840/882)
arm64     : 0.8319 (594/714)
armel     : 0.9963 (815/818)
armhf     : 0.9209 (733/796)
hppa      : 0.7149 (697/975)
i386      : 0.9430 (960/1018)
ia64      : 0.9640 (991/1028)
m68k      : 0.3360 (294/875)
mips      : 0.6114 (409/669)
mips64el  : 0.6803 (630/926)
mipsel    : 0.5560 (417/750)
powerpc   : 0.8463 (644/761)
powerpcspe: 0.6922 (515/744)
ppc64     : 0.6330 (357/564)
ppc64el   : 0.8069 (539/668)
riscv64   : 0.8771 (778/887)
s390      : 0.6482 (645/995)
s390x     : 0.8117 (569/701)
sh4       : 0.9284 (1089/1173)
sparc     : 0.8982 (918/1022)
sparc64   : 0.8544 (528/618)
x32       : 0.9463 (846/894)





In [6]:
print(f"Overall Accuracy: {overall_accuracy:.4f} ({correct}/{total})")

Overall Accuracy: 0.8039 (15499/19279)
