In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from src.models import EndiannessModel
from src.dataset_loaders import ISAdetectCodeOnlyDataset, random_train_test_split
from src.transforms import EndiannessCount

num_epochs = 2
learning_rate = 0.001
batch_size = 4
SEED = 42

# Create dataloaders
mips_dir = "../../dataset/ISAdetect/ISAdetect_full_dataset/mips"
mipsel_dir = "../../dataset/ISAdetect/ISAdetect_full_dataset/mipsel"

dataset = ISAdetectCodeOnlyDataset(
    dataset_path="../../dataset/ISAdetect/ISAdetect_full_dataset",
    transform=EndiannessCount(),
    file_byte_read_limit=2 * (2**10),
)
train_set, test_set = random_train_test_split(dataset, test_split=0.2, seed=SEED)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=1)

In [6]:
model = EndiannessModel(with_sigmoid=False)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

endianness_map = {"little": 0, "big": 1}

print("Training model...")
# Training loop
for epoch in range(num_epochs):
    print(f"before enumerate {epoch}")
    for i, (batch_x, batch_y) in enumerate(train_loader):
        if i == 0:
            print(f"starting_epoch {epoch}")
        optimizer.zero_grad()
        output = model(batch_x)
        targets = torch.tensor(
            [endianness_map[e] for e in batch_y["endianness"]], dtype=torch.float32
        )
        loss = criterion(output, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()

        # if epoch == 0 and i < 10:
        #     print(f"Initial loss: {loss.item()}")

        if (i + 1) % (len(train_loader) // 10) == 0:
            print(f"Step {i+1}, Loss: {loss.item()}")
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

Training model...
before enumerate 0
starting_epoch 0
Step 1927, Loss: 0.3149182200431824
Step 3854, Loss: 1.3296594619750977
Step 5781, Loss: 0.3655053973197937
Step 7708, Loss: 0.004180599004030228
Step 9635, Loss: 0.42354816198349
Step 11562, Loss: 0.15268521010875702
Step 13489, Loss: 0.057835064828395844
Step 15416, Loss: 0.24987410008907318
Step 17343, Loss: 0.41581445932388306
Step 19270, Loss: 0.16576813161373138
Epoch 1/2, Loss: 0.0038063274696469307
before enumerate 1
starting_epoch 1
Step 1927, Loss: 0.0015092723770067096
Step 3854, Loss: 0.14278145134449005
Step 5781, Loss: 0.0849192813038826
Step 7708, Loss: 0.023575587198138237
Step 9635, Loss: 0.09041085839271545
Step 11562, Loss: 0.30729275941848755
Step 13489, Loss: 0.005693867802619934
Step 15416, Loss: 2.672973871231079
Step 17343, Loss: 0.2027324140071869
Step 19270, Loss: 0.0023603406734764576
Epoch 2/2, Loss: 0.01830695942044258


In [9]:
# Test model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    class_1 = 0
    for batch_x, batch_y in test_loader:
        output = model(batch_x)
        batch_y = [endianness_map[e] for e in batch_y["endianness"]]
        for i in range(len(output)):
            if output[i] >= 0.5:
                pred = 1
            else:
                pred = 0
            if pred == batch_y[i]:
                correct += 1
            if batch_y[i] == 1:
                class_1 += 1
            total += 1
    print(f"Accuracy: {correct/total}")
    print(f"Class 1: {class_1}, Total: {total}, Percentage: {class_1/total}")

Accuracy: 0.8990092847139375
Class 1: 8092, Total: 19279, Percentage: 0.41973131386482704
