In [2]:
# Import necessary libraries
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from timm import create_model
import pickle
import random
from data_utils import permute_elements

  from .autonotebook import tqdm as notebook_tqdm


# Utility functions:

## Function: load_and_permute_data
 This function loads data from the source folder, applies a random or custom permutation to the data,
 and saves the permuted data and labels to the destination folder.

In [3]:
def load_and_permute_data(src_folder, dst_folder, perm_path=None):
    # Generate a random permutation of the numbers from 0 to 1023 or load custom permutation
    if perm_path:
        with open(perm_path, 'rb') as f:
            random_permutation = pickle.load(f)
            print(f"Loaded permutation from {perm_path}")
    else:
        random_permutation = random.sample(range(1024), 1024)

    # List of data and label file names
    data_files = ["train_data.pt", "test_data.pt", "val_data.pt"]
    label_files = ["train_labels.pt", "test_labels.pt", "val_labels.pt"]

    # Load, permute and save data files
    for data_file in data_files:
        # Load the data tensor
        data_tensor = torch.load(os.path.join(src_folder, data_file))

        # Apply the permute_elements function
        permuted_data_tensor = permute_elements(data_tensor, random_permutation)

        # Save the permuted data tensor to the destination folder
        torch.save(permuted_data_tensor, os.path.join(dst_folder, data_file))

    # Load and save label files
    for label_file in label_files:
        # Load the label tensor
        label_tensor = torch.load(os.path.join(src_folder, label_file))

        # Save the label tensor to the destination folder
        torch.save(label_tensor, os.path.join(dst_folder, label_file))

## Function: load_data_and_labels
This function loads data and labels from the source folder and returns them as tensors.


In [4]:
def load_data_and_labels(src_folder):
    data_files = ["train_data.pt", "test_data.pt", "val_data.pt"]
    label_files = ["train_labels.pt", "test_labels.pt", "val_labels.pt"]

    data_tensors = []
    label_tensors = []

    for data_file, label_file in zip(data_files, label_files):
        data_tensor = torch.load(os.path.join(src_folder, data_file))
        label_tensor = torch.load(os.path.join(src_folder, label_file))

        data_tensors.append(data_tensor)
        label_tensors.append(label_tensor)

    return data_tensors, label_tensors

## Function: create_data_loaders
This function creates DataLoader objects for the train and test datasets.

In [5]:
def create_data_loaders(train_data, train_labels, test_data, test_labels, batch_size=180):
    train_transform = transforms.Compose([
        transforms.ToTensor()
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor()
    ])

    train_dataset = TensorDataset(train_data, train_labels)
    test_dataset = TensorDataset(test_data, test_labels)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, test_loader


## Function: train_model
This function trains the model using the provided training data.

In [6]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(train_loader)


 ## Function: test_model
 This function tests the model using the provided test data and returns the accuracy.

In [7]:
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

## Function: test_train
This training and testing (after every epoch) are executed.

In [8]:
def test_train(src_folder, num_epochs=100):
    device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

    # Load data and labels
    train_data, test_data, _ = load_data_and_labels(src_folder)[0]
    train_labels, test_labels, _ = load_data_and_labels(src_folder)[1]

    # Create data loaders
    train_loader, test_loader = create_data_loaders(train_data, train_labels, test_data, test_labels)

    # Load EfficientNet model
    model = create_model("efficientnet_b0", pretrained=True, num_classes=2, in_chans=1)
    
    model = model.to(device)

    # Set up the criterion and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=5e-4)

    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader, criterion, optimizer, device)
        test_accuracy = test_model(model, test_loader, device)

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}, Test accuracy: {test_accuracy:.2f}%")

        # Adjust learning rate
        if (epoch + 1) % 30 == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.1


# Step 1: Create a randomly permuted version of the dataset cifar10_binary

In [59]:
src_folder = "user_datasets/cifar10_binary"
dst_folder = "user_datasets/cifar10_binary_rand_perm"
perm_path = None

if os.path.exists(dst_folder):
    shutil.rmtree(dst_folder)  # Deletes the contents of the folder

os.makedirs(dst_folder)  # Creates the folder

load_and_permute_data(src_folder, dst_folder, perm_path=perm_path)

# Step 2: Rearrange the permuted features using Feature Rearrangement algorithm & IGTD

To do this, first change the 'data_dir_name' field in the yaml config files 
'configs/cifar10_binary_restruct_feature_rearrangement_algorithm_config.yaml' and 'configs/cifar10_binary_restruct_igtd_config.yaml'
to cifar10_binary_rand_perm. Then, run the following commands:


In [61]:
!python3 restructure_data.py configs/cifar10_binary_restruct_feature_rearrangement_algorithm_config.yaml
!python3 restructure_data.py configs/cifar10_binary_restruct_igtd_config.yaml

## Testing: Feature Rearrangement algorithm rearranged features

# Step 3: Create versions of the dataset with features rearranged using Feature Rearrangement algorithm and IGTD


In [62]:
# Create a version of the randomly permuted dataset with features rearranged using Feature Rearrangement algorithm
src_folder = "user_datasets/cifar10_binary_rand_perm"
dst_folder = "user_datasets/cifar10_binary_feature_rearrangement_algorithm_rearr"
perm_path = "user_datasets/cifar10_binary_rand_perm/rearrangement_feature_rearrangement_algorithm.pkl"

if os.path.exists(dst_folder):
    shutil.rmtree(dst_folder)  # Deletes the contents of the folder

os.makedirs(dst_folder)  # Creates the folder

load_and_permute_data(src_folder, dst_folder, perm_path=perm_path)

# Create a version of the randomly permuted dataset with features rearranged using IGTD
src_folder = "user_datasets/cifar10_binary_rand_perm"
dst_folder = "user_datasets/cifar10_binary_igtd_rearr"
perm_path = "user_datasets/cifar10_binary_rand_perm/rearrangement_igtd.pkl"

if os.path.exists(dst_folder):
    shutil.rmtree(dst_folder)  # Deletes the contents of the folder

os.makedirs(dst_folder)  # Creates the folder

load_and_permute_data(src_folder, dst_folder, perm_path=perm_path)

Loaded permutation from user_datasets/cifar10_binary_rand_perm/rearrangement_alg3.pkl
Loaded permutation from user_datasets/cifar10_binary_rand_perm/rearrangement_igtd.pkl


# Step 4: Test the utility of the different rearrangements

## Testing: Randomly permuted features

In [None]:
src_folder = "user_datasets/cifar10_binary_rand_perm"
test_train(src_folder, num_epochs=100)

Epoch 1/100, Loss: 3.9407, Test accuracy: 54.32%
Epoch 2/100, Loss: 1.2332, Test accuracy: 58.67%
Epoch 3/100, Loss: 0.8278, Test accuracy: 63.22%
Epoch 4/100, Loss: 0.6545, Test accuracy: 65.42%
Epoch 5/100, Loss: 0.6118, Test accuracy: 66.62%
Epoch 6/100, Loss: 0.5900, Test accuracy: 65.87%
Epoch 7/100, Loss: 0.5700, Test accuracy: 67.42%
Epoch 8/100, Loss: 0.5397, Test accuracy: 66.22%
Epoch 9/100, Loss: 0.4953, Test accuracy: 64.47%
Epoch 10/100, Loss: 0.4384, Test accuracy: 63.97%
Epoch 11/100, Loss: 0.3579, Test accuracy: 63.47%
Epoch 12/100, Loss: 0.3124, Test accuracy: 64.42%
Epoch 13/100, Loss: 0.2473, Test accuracy: 63.77%
Epoch 14/100, Loss: 0.2163, Test accuracy: 64.12%
Epoch 15/100, Loss: 0.2198, Test accuracy: 66.37%
Epoch 16/100, Loss: 0.1848, Test accuracy: 64.37%
Epoch 17/100, Loss: 0.1746, Test accuracy: 64.42%
Epoch 18/100, Loss: 0.1508, Test accuracy: 65.47%
Epoch 19/100, Loss: 0.1474, Test accuracy: 66.47%
Epoch 20/100, Loss: 0.1056, Test accuracy: 64.97%
Epoch 21/

In [9]:
src_folder = "user_datasets/cifar10_binary_feature_rearrangement_algorithm_rearr"
test_train(src_folder, num_epochs=100)

Epoch 1/100, Loss: 3.6906, Test accuracy: 54.92%
Epoch 2/100, Loss: 1.2459, Test accuracy: 66.77%
Epoch 3/100, Loss: 0.7683, Test accuracy: 67.72%
Epoch 4/100, Loss: 0.5928, Test accuracy: 70.36%
Epoch 5/100, Loss: 0.5501, Test accuracy: 71.11%
Epoch 6/100, Loss: 0.5229, Test accuracy: 72.36%
Epoch 7/100, Loss: 0.4849, Test accuracy: 72.01%
Epoch 8/100, Loss: 0.4386, Test accuracy: 72.06%
Epoch 9/100, Loss: 0.3822, Test accuracy: 71.26%
Epoch 10/100, Loss: 0.3333, Test accuracy: 71.16%
Epoch 11/100, Loss: 0.3112, Test accuracy: 71.26%
Epoch 12/100, Loss: 0.4136, Test accuracy: 72.91%
Epoch 13/100, Loss: 0.3597, Test accuracy: 72.56%
Epoch 14/100, Loss: 0.2437, Test accuracy: 71.31%
Epoch 15/100, Loss: 0.1969, Test accuracy: 72.06%
Epoch 16/100, Loss: 0.1632, Test accuracy: 71.76%
Epoch 17/100, Loss: 0.1232, Test accuracy: 72.46%
Epoch 18/100, Loss: 0.1542, Test accuracy: 71.61%
Epoch 19/100, Loss: 0.1249, Test accuracy: 73.26%
Epoch 20/100, Loss: 0.0720, Test accuracy: 71.46%
Epoch 21/

## Testing: IGTD rearranged features

In [None]:
src_folder = "user_datasets/cifar10_binary_igtd_rearr"
test_train(src_folder, num_epochs=100)

Epoch 1/100, Loss: 3.5318, Test accuracy: 58.07%
Epoch 2/100, Loss: 1.2353, Test accuracy: 65.92%
Epoch 3/100, Loss: 0.7157, Test accuracy: 67.57%
Epoch 4/100, Loss: 0.5944, Test accuracy: 69.12%
Epoch 5/100, Loss: 0.5369, Test accuracy: 68.12%
Epoch 6/100, Loss: 0.5420, Test accuracy: 68.82%
Epoch 7/100, Loss: 0.5125, Test accuracy: 69.02%
Epoch 8/100, Loss: 0.5743, Test accuracy: 67.57%
Epoch 9/100, Loss: 0.5396, Test accuracy: 68.42%
Epoch 10/100, Loss: 0.5774, Test accuracy: 69.72%
Epoch 11/100, Loss: 0.5807, Test accuracy: 69.87%
Epoch 12/100, Loss: 0.5268, Test accuracy: 69.27%
Epoch 13/100, Loss: 0.5145, Test accuracy: 69.92%
Epoch 14/100, Loss: 0.4464, Test accuracy: 68.77%
Epoch 15/100, Loss: 0.3587, Test accuracy: 68.32%
Epoch 16/100, Loss: 0.2870, Test accuracy: 68.72%
Epoch 17/100, Loss: 0.2344, Test accuracy: 66.87%
Epoch 18/100, Loss: 0.2247, Test accuracy: 66.97%
Epoch 19/100, Loss: 0.1595, Test accuracy: 68.37%
Epoch 20/100, Loss: 0.1490, Test accuracy: 66.92%
Epoch 21/