In [None]:
"""libraries required"""
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install timm
!pip install torchattacks

Collecting timm
  Downloading timm-0.9.16-py3-none-any.whl (2.2 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.2 MB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hColle

In [None]:
"""connect to google drive to store and retrieve data"""
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Dataset Loader**

In [None]:
from torch.utils.data import Dataset
import torch
import torchaudio
import torchvision
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

class FSCDataset(Dataset):
    def __init__(self, annotations_file, audio_dir, device, num_samples=80000, sr=16000):
        self.device = device
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        self.num_samples = num_samples
        self.sample_rate = sr
        self.transform = torchvision.transforms.Compose([
            # convert signal to mel spectrogam
            torchaudio.transforms.MelSpectrogram(sample_rate=self.sample_rate, n_fft=2048, hop_length=512).to(self.device),
            # convert to log scale
            torchaudio.transforms.AmplitudeToDB().to(self.device),
            # resize to 224x224
            torchvision.transforms.Resize((224, 224), antialias=True).to(self.device),
            # change to [0, 1] range using min max scaling
            torchvision.transforms.Lambda(lambda x: (x - x.min()) / (x.max() - x.min()))
        ])

        self.data = {}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        label = self._get_audio_sample_label(index)
        signal, sr = torchaudio.load(audio_sample_path)
        signal = signal.to(self.device)

        # make number of channels = 1
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)

        # resampling to make the sample rate as sample_rate
        resample_transform = torchaudio.transforms.Resample(sr, self.sample_rate).to(self.device)
        signal = resample_transform(signal)

        # cut if more samples than sample_rate * duration
        if (signal.shape[1] > self.num_samples):
            signal = signal[:, :self.num_samples]

        # right pad 0s if less than num_samples
        if (signal.shape[1] < self.num_samples):
            num_missing = self.num_samples - signal.shape[1]
            last_dim_padding = (0, num_missing)
            signal = torch.nn.functional.pad(signal, last_dim_padding)

        mel_spectrogram = self.transform(signal)

        return mel_spectrogram, label


    def _get_audio_sample_path(self, index):
        filename = self.annotations.iloc[index, 1]
        path = os.path.join(self.audio_dir, filename)
        return path

    def _get_audio_sample_label(self, index):
        return self.annotations.iloc[index, 2] - 1

In [None]:
ANNOTATIONS_FILE = "/content/drive/MyDrive/data/FSC22dataset/Metadata/MetadataV1.0FSC22.csv"
AUDIO_DIR = "/content/drive/MyDrive/data/FSC22dataset/AudioWiseV1.0"
SAMPLE_RATE=16000

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'Using device: {device}')

fsc = FSCDataset(ANNOTATIONS_FILE, AUDIO_DIR, device)
print(f"Total number of samples present in dataset: {len(fsc)}")

Using device: cpu
Total number of samples present in dataset: 2025


In [None]:
import h5py

h5_file_path = '/content/drive/MyDrive/data/AdvData/FSC22.h5'
with h5py.File(h5_file_path, 'w') as hf:
    for i in range(len(fsc)):
        data, label = fsc[i]

        # Create datasets within the HDF5 file
        grp = hf.create_group(f'sample_{i}')
        grp.create_dataset('data', data=data.cpu().numpy())
        grp.create_dataset('label', data=label)

print("Dataset saved to", h5_file_path)

Dataset saved to /content/drive/MyDrive/data/AdvData/FSC22.h5


# Directly load data from h5 file

In [None]:
import h5py
import torch
from torch.utils.data import TensorDataset

device = "cuda" if torch.cuda.is_available() else "cpu"

data_list = []
label_list = []

# Open the HDF5 file
h5_file_path = '/content/drive/MyDrive/data/AdvData/FSC22.h5'
with h5py.File(h5_file_path, 'r') as hf:
    # Iterate over the samples in the HDF5 file
    for sample_name in hf.keys():
        data = torch.tensor(hf[sample_name]['data'][:])
        label = hf[sample_name]['label'][()]

        # Append data and label to lists
        data_list.append(data)
        label_list.append(label)

fsc = TensorDataset(torch.stack(data_list), torch.tensor(label_list))

**Function to train the model and calculate metrics**

In [None]:
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import random_split
import torch.nn.functional as F
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from sklearn.manifold import TSNE
import seaborn as sns

# function to train the model
def train_model(model, dataset, learning_rate=2e-4, num_epochs=10, batch_size=128, stepSize=10, gamma=0.1, validation_split=0.2, patience=3):
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()
    scheduler = StepLR(optimizer, step_size=stepSize, gamma=gamma)
    # scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

    # Split dataset into training and validation sets
    num_samples = len(dataset)
    num_validation_samples = int(validation_split * num_samples)
    num_training_samples = num_samples - num_validation_samples
    training_dataset, validation_dataset = random_split(dataset, [num_training_samples, num_validation_samples])

    train_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

    best_valid_loss = float('inf')
    patience_counter = 0
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        model.train()
        for batch_input, batch_target in train_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            optimizer.zero_grad()

            predictions = model(batch_input)

            loss = loss_fn(predictions, batch_target)
            loss.backward()

            optimizer.step()

        print(f"  Training Loss: {loss.item()}")

        # Validation phase
        model.eval()
        with torch.no_grad():
            valid_loss = 0
            correct = 0
            total = 0
            for batch_input, batch_target in valid_loader:
                batch_input, batch_target = batch_input.to(device), batch_target.to(device)
                predictions = model(batch_input)
                _, predicted = torch.max(predictions, 1)
                total += batch_target.size(0)
                correct += (predicted == batch_target).sum().item()
                valid_loss += loss_fn(predictions, batch_target).item()

            accuracy = correct / total
            valid_loss /= len(valid_loader)
            print(f"  Validation Loss: {valid_loss}, Accuracy: {accuracy * 100:.2f}%")

            # Check for early stopping
            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping after {patience} epochs of no improvement.")
                    return

        scheduler.step()

    print("Training finished.")

# function to calculate metrics
def calculate_metrics(model, dataset):
    data_loader = DataLoader(dataset, batch_size=128, shuffle=True)
    model = model.to(device)
    model.eval()
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            predictions = model(batch_input)
            all_predictions.append(predictions.argmax(dim=1))
            all_targets.append(batch_target)

    all_predictions = torch.cat(all_predictions).cpu().numpy()
    all_targets = torch.cat(all_targets).cpu().numpy()

    accuracy = accuracy_score(all_targets, all_predictions)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # precision, recall, f1, _ = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')
    # print(f"Precision: {precision:.4f}")
    # print(f"Recall: {recall:.4f}")
    # print(f"F1 Score: {f1:.4f}")

    # cm = confusion_matrix(all_targets, all_predictions)
    # # Plot the confusion matrix using Seaborn
    # sns.heatmap(cm, annot=True)
    # plt.title("Confusion Matrix")
    # plt.xlabel("Predicted Label")
    # plt.ylabel("True Label")
    # plt.show()

In [None]:
import torchattacks
import h5py
import time

def generate_adversarial(dataset, model, model_name):
    data_loader = DataLoader(dataset, batch_size=32)
    model = model.to(device)

    attack_dict = {}
    attack_dict['fgsm'] = torchattacks.FGSM(model, eps=8/255)
    attack_dict['fgsm'].set_mode_targeted_by_label()

    attack_dict['bim'] = torchattacks.BIM(model, eps=8/255, alpha=2/255, steps=4)
    attack_dict['bim'].set_mode_targeted_by_label()

    attack_dict['pgd'] = torchattacks.PGD(model, eps=8/255, alpha=1/255, steps=4, random_start=True)
    attack_dict['pgd'].set_mode_targeted_by_label()

    attack_dict['cw'] = torchattacks.CW(model, c=1, kappa=0, steps=20, lr=0.01)
    attack_dict['cw'].set_mode_targeted_by_label()

    for name, attk in attack_dict.items():
        st = time.time()
        i = 0
        current_batch = 0
        h5_file_path = f'/content/drive/MyDrive/data/Target/FSC22/{model_name}/{name}.h5'
        with h5py.File(h5_file_path, 'a') as hf:
            print(f"started {name}")
            for batch_input, batch_target in data_loader:

                # generate adv image for image
                batch_input, batch_target = batch_input.to(device), batch_target.to(device)
                new_labels = torch.full((batch_target.size()), 19).to(device)
                # clapping 19

                adv_images = attk(batch_input, new_labels)

                # write in h5 file
                for j in range(batch_input.shape[0]):
                    grp_name = f'sample_{i}'
                    if grp_name in hf:
                        del hf[grp_name]  # Delete existing group
                    grp = hf.create_group(f'sample_{i}')
                    grp.create_dataset('data', data=adv_images[j].cpu().numpy())
                    grp.create_dataset('label', data=batch_target[j].cpu())
                    i += 1
                if (current_batch % 100 == 0):
                    print(f"{current_batch} - ", end="")
                current_batch += 1
            print("\n")

        end = time.time()
        print(f'{name} adv dataset for {model_name}')
"""
runs the given model on all adv dataset of `adv_dir` directory
"""
def calculate_accuracy_for_adv_data(model, model_name, adv_dir):
    attk_list = ['fgsm', 'bim', 'pgd', 'cw']

    for attk in attk_list:
        h5_file_path = '/content/drive/MyDrive/data/Target/FSC22/{adv_dir}/{attk}.h5'
        data_list = []
        label_list = []

        with h5py.File(h5_file_path, 'r') as hf:
            # Iterate over the samples in the HDF5 file
            for sample_name in hf.keys():
                data = torch.tensor(hf[sample_name]['data'][:])
                label = hf[sample_name]['label'][()]

                # Append data and label to lists
                data_list.append(data)
                label_list.append(label)

        dataset_class = SpectrogramDataset(data_list, label_list)
        print(f'Stats for {model_name} on {adv_dir}/{attk} dataset')
        calculate_metrics(model, dataset_class)

def for_all(model, model_name):
    adv_dirs = ['resnet18', 'resnet50', 'vit_base', 'vit_large', 'mixer']
    for adv in adv_dirs:
        calculate_accuracy_for_adv_data(model, model_name, adv)

# ResNet 18

In [None]:
import timm

resnet18 = timm.create_model('resnet18.a1_in1k', pretrained=True)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet18.fc = nn.Linear(in_features=512, out_features=27, bias=True)

In [None]:
torch.cuda.empty_cache()
train_model(resnet18, fsc, learning_rate=1e-3, num_epochs=12, batch_size=64)

torch.save(resnet18.state_dict(), '/content/drive/MyDrive/data/Models/FSC22/resnet18.pth')

Epoch 1/12
  Training Loss: 2.710721969604492
  Validation Loss: 3.2435966559818814, Accuracy: 6.67%
Epoch 2/12
  Training Loss: 2.0344345569610596
  Validation Loss: 3.0930284091404507, Accuracy: 9.38%
Epoch 3/12
  Training Loss: 1.4377520084381104
  Validation Loss: 2.5995363167354038, Accuracy: 19.26%
Epoch 4/12
  Training Loss: 1.374700665473938
  Validation Loss: 2.0250676529748097, Accuracy: 40.00%
Epoch 5/12
  Training Loss: 0.4959043860435486
  Validation Loss: 2.191277861595154, Accuracy: 33.58%
Epoch 6/12
  Training Loss: 0.7159674763679504
  Validation Loss: 1.7947085074016027, Accuracy: 49.14%
Epoch 7/12
  Training Loss: 0.39392462372779846
  Validation Loss: 2.6157002108437672, Accuracy: 29.14%
Epoch 8/12
  Training Loss: 0.36386561393737793
  Validation Loss: 1.3515149354934692, Accuracy: 59.01%
Epoch 9/12
  Training Loss: 0.35319042205810547
  Validation Loss: 1.359156506402152, Accuracy: 59.75%
Epoch 10/12
  Training Loss: 0.45977360010147095
  Validation Loss: 1.367829

In [None]:
torch.cuda.empty_cache()
checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet18.pth')
resnet18.load_state_dict(checkpoint)

generate_adversarial(fsc, resnet18, 'resnet18')

Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
started fgsm
0 - 

fgsm adv dataset for resnet18
started bim
0 - 

bim adv dataset for resnet18
started pgd
0 - 

pgd adv dataset for resnet18
started cw
0 - 

cw adv dataset for resnet18


In [None]:
import timm

def ret_pred(model, dataset):
    data_loader = DataLoader(dataset, batch_size=128)
    model = model.to(device)
    model.eval()
    all_predictions = []
    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input = batch_input.to(device)
            predictions = model(batch_input)
            all_predictions.append(predictions.argmax(dim=1))

    all_predictions = torch.cat(all_predictions)
    return all_predictions

def cal_ratio():
    attk_list = ['fgsm', 'bim', 'pgd', 'cw']
    adv_dir = 'resnet18'


    for attk in attk_list:
        h5_file_path = f'/content/drive/MyDrive/data/Target/FSC22/{adv_dir}/{attk}.h5'
        data_list = []
        label_list = []

        with h5py.File(h5_file_path, 'r') as hf:
            # Iterate over the samples in the HDF5 file
            for sample_name in hf.keys():
                data = torch.tensor(hf[sample_name]['data'][:])
                label = hf[sample_name]['label'][()]

                # Append data and label to lists
                data_list.append(data)
                label_list.append(label)

        dataset_class = TensorDataset(torch.stack(data_list), torch.tensor(label_list))
        label_list = torch.tensor(label_list)

        resnet18 = timm.create_model('resnet18.a1_in1k', pretrained=True)
        resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        resnet18.fc = nn.Linear(in_features=512, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet18.pth')
        resnet18.load_state_dict(checkpoint)
        r18 = ret_pred(resnet18, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r18[i] == label_list[i]):
                unfooled += 1
            else:
                same += 1

        print (unfooled/2025, same/2025, diff/2025)

        print(f'Ratio for {adv_dir}/{attk} dataset')

        resnet50 = timm.create_model('resnet50.a1_in1k', pretrained=True)
        resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        resnet50.fc = nn.Linear(in_features=2048, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet50.pth')
        resnet50.load_state_dict(checkpoint)
        r50 = ret_pred(resnet50, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r50[i] == label_list[i]):
                unfooled += 1
            elif (r50[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        vit_base = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
        vit_base.patch_embed.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
        vit_base.head = nn.Linear(in_features=768, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/vit_base.pth')
        vit_base.load_state_dict(checkpoint)
        vb = ret_pred(vit_base, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (vb[i] == label_list[i]):
                unfooled += 1
            elif (vb[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        mixer = timm.create_model('mixer_b16_224.goog_in21k_ft_in1k', pretrained=True)
        mixer.stem.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
        mixer.head = nn.Linear(in_features=768, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/mixer.pth')
        mixer.load_state_dict(checkpoint)
        m = ret_pred(mixer, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (m[i] == label_list[i]):
                unfooled += 1
            elif (m[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        del dataset_class
        del data_list
        del label_list

In [None]:
cal_ratio()

0.22666666666666666 0.7733333333333333 0.0
Ratio for resnet18/fgsm dataset
0.7318518518518519 0.06814814814814815 0.2
0.6834567901234568 0.03259259259259259 0.2839506172839506
0.9279012345679012 0.006419753086419753 0.06567901234567901
0.057777777777777775 0.9422222222222222 0.0
Ratio for resnet18/bim dataset
0.9293827160493827 0.0009876543209876543 0.06962962962962962
0.6893827160493827 0.008888888888888889 0.3017283950617284
0.9288888888888889 0.0014814814814814814 0.06962962962962962
0.08049382716049383 0.9195061728395062 0.0
Ratio for resnet18/pgd dataset
0.9160493827160494 0.0004938271604938272 0.08345679012345679
0.6869135802469136 0.008888888888888889 0.30419753086419754
0.928395061728395 0.0014814814814814814 0.07012345679012345
0.7619753086419753 0.2380246913580247 0.0
Ratio for resnet18/cw dataset
0.9501234567901234 0.0049382716049382715 0.044938271604938275
0.688395061728395 0.0049382716049382715 0.30666666666666664
0.928395061728395 0.0034567901234567903 0.06814814814814815

# Resnet 50

In [None]:
import timm

resnet50 = timm.create_model('resnet50.a1_in1k', pretrained=True)
resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet50.fc = nn.Linear(in_features=2048, out_features=27, bias=True)

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

In [None]:
torch.cuda.empty_cache()
train_model(resnet50, fsc, learning_rate=1e-3, num_epochs=12, batch_size=64, stepSize=8)

torch.save(resnet50.state_dict(), '/content/drive/MyDrive/data/Models/FSC22/resnet50.pth')

Epoch 1/12
  Training Loss: 2.4980111122131348
  Validation Loss: 3.47109545980181, Accuracy: 2.72%
Epoch 2/12
  Training Loss: 1.5741358995437622
  Validation Loss: 3.3762620857783725, Accuracy: 9.63%
Epoch 3/12
  Training Loss: 1.252087950706482
  Validation Loss: 2.4790454592023576, Accuracy: 27.90%
Epoch 4/12
  Training Loss: 0.8580464124679565
  Validation Loss: 1.9976906776428223, Accuracy: 39.51%
Epoch 5/12
  Training Loss: 0.6934912800788879
  Validation Loss: 1.8577135971614294, Accuracy: 50.12%
Epoch 6/12
  Training Loss: 1.0673478841781616
  Validation Loss: 1.3955492292131697, Accuracy: 54.07%
Epoch 7/12
  Training Loss: 0.7631818056106567
  Validation Loss: 1.303287148475647, Accuracy: 63.21%
Epoch 8/12
  Training Loss: 0.27572065591812134
  Validation Loss: 1.6422770534242903, Accuracy: 56.30%
Epoch 9/12
  Training Loss: 0.16625110805034637
  Validation Loss: 0.9815296104976109, Accuracy: 73.83%
Epoch 10/12
  Training Loss: 0.394150048494339
  Validation Loss: 0.939119756

In [None]:
import timm

def ret_pred(model, dataset):
    data_loader = DataLoader(dataset, batch_size=128)
    model = model.to(device)
    model.eval()
    all_predictions = []
    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input = batch_input.to(device)
            predictions = model(batch_input)
            all_predictions.append(predictions.argmax(dim=1))

    all_predictions = torch.cat(all_predictions)
    return all_predictions

def cal_ratio():
    attk_list = ['fgsm', 'bim', 'pgd', 'cw']
    adv_dir = 'resnet50'


    for attk in attk_list:
        h5_file_path = f'/content/drive/MyDrive/data/Target/FSC22/{adv_dir}/{attk}.h5'
        data_list = []
        label_list = []

        with h5py.File(h5_file_path, 'r') as hf:
            # Iterate over the samples in the HDF5 file
            for sample_name in hf.keys():
                data = torch.tensor(hf[sample_name]['data'][:])
                label = hf[sample_name]['label'][()]

                # Append data and label to lists
                data_list.append(data)
                label_list.append(label)

        dataset_class = TensorDataset(torch.stack(data_list), torch.tensor(label_list))
        label_list = torch.tensor(label_list)

        resnet50 = timm.create_model('resnet50.a1_in1k', pretrained=True)
        resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        resnet50.fc = nn.Linear(in_features=2048, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet50.pth')
        resnet50.load_state_dict(checkpoint)
        r18 = ret_pred(resnet50, dataset_class)
        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r18[i] == label_list[i]):
                unfooled += 1
            else:
                same += 1

        print (unfooled/2025, same/2025, diff/2025)

        print(f'Ratio for {adv_dir}/{attk} dataset')

        resnet18 = timm.create_model('resnet18.a1_in1k', pretrained=True)
        resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        resnet18.fc = nn.Linear(in_features=512, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet18.pth')
        resnet18.load_state_dict(checkpoint)
        r50 = ret_pred(resnet18, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r50[i] == label_list[i]):
                unfooled += 1
            elif (r50[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        vit_base = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
        vit_base.patch_embed.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
        vit_base.head = nn.Linear(in_features=768, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/vit_base.pth')
        vit_base.load_state_dict(checkpoint)
        vb = ret_pred(vit_base, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (vb[i] == label_list[i]):
                unfooled += 1
            elif (vb[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        mixer = timm.create_model('mixer_b16_224.goog_in21k_ft_in1k', pretrained=True)
        mixer.stem.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
        mixer.head = nn.Linear(in_features=768, out_features=27, bias=True)
        torch.cuda.empty_cache()
        checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/mixer.pth')
        mixer.load_state_dict(checkpoint)
        m = ret_pred(mixer, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (m[i] == label_list[i]):
                unfooled += 1
            elif (m[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        del dataset_class
        del data_list
        del label_list

cal_ratio()

0.3530864197530864 0.6469135802469136 0.0
Ratio for resnet50/fgsm dataset
0.6266666666666667 0.10765432098765432 0.26567901234567903
0.6879012345679012 0.037037037037037035 0.2750617283950617
0.9264197530864198 0.011358024691358024 0.06222222222222222
0.10469135802469136 0.8953086419753087 0.0
Ratio for resnet50/bim dataset
0.9101234567901234 0.003950617283950617 0.08592592592592592
0.6859259259259259 0.013333333333333334 0.30074074074074075
0.9274074074074075 0.0019753086419753087 0.07061728395061728
0.1145679012345679 0.885432098765432 0.0
Ratio for resnet50/pgd dataset
0.8780246913580247 0.0049382716049382715 0.11703703703703704
0.6859259259259259 0.011851851851851851 0.3022222222222222
0.9279012345679012 0.0049382716049382715 0.0671604938271605
0.8409876543209877 0.15901234567901235 0.0
Ratio for resnet50/cw dataset
0.9432098765432099 0.005925925925925926 0.0508641975308642
0.6879012345679012 0.010864197530864197 0.3012345679012346
0.9279012345679012 0.0024691358024691358 0.0696296

In [None]:
torch.cuda.empty_cache()
checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet50.pth')
resnet50.load_state_dict(checkpoint)

generate_adversarial(fsc, resnet50, 'resnet50')

Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
started fgsm
0 - 

fgsm adv dataset for resnet50
started bim
0 - 

bim adv dataset for resnet50
started pgd
0 - 

pgd adv dataset for resnet50
started cw
0 - 

cw adv dataset for resnet50


# VIT Base

In [None]:
import timm

vit_base = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
vit_base.patch_embed.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
vit_base.head = nn.Linear(in_features=768, out_features=27, bias=True)

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [None]:
torch.cuda.empty_cache()
train_model(vit_base, fsc, learning_rate=1e-4, num_epochs=12, batch_size=32, patience=3)

torch.save(vit_base.state_dict(), '/content/drive/MyDrive/data/Models/FSC22/vit_base.pth')

Epoch 1/12
  Training Loss: 3.4033825397491455
  Validation Loss: 3.3511867706592264, Accuracy: 3.70%
Epoch 2/12
  Training Loss: 3.2229294776916504
  Validation Loss: 3.2032161309168887, Accuracy: 6.91%
Epoch 3/12
  Training Loss: 2.8648111820220947
  Validation Loss: 2.9743086924919715, Accuracy: 11.36%
Epoch 4/12
  Training Loss: 2.601235866546631
  Validation Loss: 2.750975113648635, Accuracy: 18.52%
Epoch 5/12
  Training Loss: 2.6651289463043213
  Validation Loss: 2.707651064946101, Accuracy: 20.00%
Epoch 6/12
  Training Loss: 2.401378631591797
  Validation Loss: 2.3539011845221887, Accuracy: 29.38%
Epoch 7/12
  Training Loss: 2.397432804107666
  Validation Loss: 2.1694906766598043, Accuracy: 34.81%
Epoch 8/12
  Training Loss: 1.7917802333831787
  Validation Loss: 2.1182315349578857, Accuracy: 34.81%
Epoch 9/12
  Training Loss: 2.3528709411621094
  Validation Loss: 2.0192835881159854, Accuracy: 38.02%
Epoch 10/12
  Training Loss: 1.8000625371932983
  Validation Loss: 2.04457506766

In [None]:
torch.cuda.empty_cache()
checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/vit_base.pth')
vit_base.load_state_dict(checkpoint)

generate_adversarial(fsc, vit_base, 'vit_base')

Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
started fgsm
0 - 

fgsm adv dataset for vit_base
started bim
0 - 

bim adv dataset for vit_base
started pgd
0 - 

pgd adv dataset for vit_base
started cw
0 - 

cw adv dataset for vit_base


# VIT Large

In [None]:
import timm

vit_large = timm.create_model('vit_large_patch16_224.augreg_in21k_ft_in1k', pretrained=True)
vit_large.patch_embed.proj = nn.Conv2d(1, 1024, kernel_size=(16, 16), stride=(16, 16))
vit_large.head = nn.Linear(in_features=1024, out_features=27, bias=True)

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

In [None]:
torch.cuda.empty_cache()
train_model(vit_large, fsc, learning_rate=1e-5, num_epochs=15, batch_size=32, patience=5, stepSize=8)

torch.save(vit_large.state_dict(), '/content/drive/MyDrive/data/Models/FSC22/vit_large.pth')

Epoch 1/15
  Training Loss: 2.803943157196045
  Validation Loss: 2.9927961092728834, Accuracy: 14.57%
Epoch 2/15
  Training Loss: 2.1639981269836426
  Validation Loss: 2.3334745443784275, Accuracy: 32.10%
Epoch 3/15
  Training Loss: 1.9786603450775146
  Validation Loss: 1.7968014753781831, Accuracy: 42.22%
Epoch 4/15
  Training Loss: 1.0768325328826904
  Validation Loss: 1.5473997684625478, Accuracy: 52.35%
Epoch 5/15
  Training Loss: 1.2476134300231934
  Validation Loss: 1.5908198540027325, Accuracy: 55.06%
Epoch 6/15
  Training Loss: 0.9780671000480652
  Validation Loss: 1.5359266446186945, Accuracy: 57.04%
Epoch 7/15
  Training Loss: 0.5545846223831177
  Validation Loss: 1.5265280375113854, Accuracy: 57.78%
Epoch 8/15
  Training Loss: 0.3354269862174988
  Validation Loss: 1.4757359348810637, Accuracy: 62.22%
Epoch 9/15
  Training Loss: 0.0322713740170002
  Validation Loss: 1.3847225216718821, Accuracy: 64.44%
Epoch 10/15
  Training Loss: 0.03290538117289543
  Validation Loss: 1.3946

In [None]:
torch.cuda.empty_cache()
checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/vit_large.pth', map_location=torch.device(device))
vit_large.load_state_dict(checkpoint)

gen(fsc, vit_large, 'vit_large')

In [None]:
import timm

def ret_pred(model, dataset):
    data_loader = DataLoader(dataset, batch_size=128)
    model = model.to(device)
    model.eval()
    all_predictions = []
    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input = batch_input.to(device)
            predictions = model(batch_input)
            all_predictions.append(predictions.argmax(dim=1))

    all_predictions = torch.cat(all_predictions)
    return all_predictions

def cal_ratio():
    attk_list = ['fgsm', 'bim', 'pgd', 'cw']
    adv_dir = 'vit_base'

    vit_base = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
    vit_base.patch_embed.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
    vit_base.head = nn.Linear(in_features=768, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/vit_base.pth')
    vit_base.load_state_dict(checkpoint)

    resnet18 = timm.create_model('resnet18.a1_in1k', pretrained=True)
    resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    resnet18.fc = nn.Linear(in_features=512, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet18.pth')
    resnet18.load_state_dict(checkpoint)

    resnet50 = timm.create_model('resnet50.a1_in1k', pretrained=True)
    resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    resnet50.fc = nn.Linear(in_features=2048, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet50.pth')
    resnet50.load_state_dict(checkpoint)

    mixer = timm.create_model('mixer_b16_224.goog_in21k_ft_in1k', pretrained=True)
    mixer.stem.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
    mixer.head = nn.Linear(in_features=768, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/mixer.pth')
    mixer.load_state_dict(checkpoint)

    for attk in attk_list:
        h5_file_path = f'/content/drive/MyDrive/data/Target/FSC22/{adv_dir}/{attk}.h5'
        data_list = []
        label_list = []

        with h5py.File(h5_file_path, 'r') as hf:
            # Iterate over the samples in the HDF5 file
            for sample_name in hf.keys():
                data = torch.tensor(hf[sample_name]['data'][:])
                label = hf[sample_name]['label'][()]

                # Append data and label to lists
                data_list.append(data)
                label_list.append(label)

        dataset_class = TensorDataset(torch.stack(data_list), torch.tensor(label_list))
        label_list = torch.tensor(label_list)

        r18 = ret_pred(vit_base, dataset_class)
        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r18[i] == label_list[i]):
                unfooled += 1
            else:
                same += 1

        print (unfooled/2025, same/2025, diff/2025)

        print(f'Ratio for {adv_dir}/{attk} dataset')

        r50 = ret_pred(resnet18, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r50[i] == label_list[i]):
                unfooled += 1
            elif (r50[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        vb = ret_pred(resnet50, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (vb[i] == label_list[i]):
                unfooled += 1
            elif (vb[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        m = ret_pred(mixer, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (m[i] == label_list[i]):
                unfooled += 1
            elif (m[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        del dataset_class
        del data_list
        del label_list

cal_ratio()

0.2325925925925926 0.7674074074074074 0.0
Ratio for vit_base/fgsm dataset
0.7091358024691358 0.017777777777777778 0.2730864197530864
0.7901234567901234 0.012345679012345678 0.19753086419753085
0.9279012345679012 0.01728395061728395 0.054814814814814816
0.1708641975308642 0.8291358024691358 0.0
Ratio for vit_base/bim dataset
0.8306172839506173 0.012345679012345678 0.15703703703703703
0.8874074074074074 0.005432098765432099 0.10716049382716049
0.9274074074074075 0.008888888888888889 0.0637037037037037
0.4380246913580247 0.5619753086419753 0.0
Ratio for vit_base/pgd dataset
0.8661728395061729 0.013333333333333334 0.12049382716049382
0.9130864197530865 0.007901234567901235 0.07901234567901234
0.9288888888888889 0.015308641975308642 0.05580246913580247
0.5032098765432099 0.49679012345679013 0.0
Ratio for vit_base/cw dataset
0.9432098765432099 0.0034567901234567903 0.05333333333333334
0.9496296296296296 0.006419753086419753 0.04395061728395062
0.9274074074074075 0.010864197530864197 0.061728

# mixer

In [None]:
import timm
mixer = timm.create_model('mixer_b16_224.goog_in21k_ft_in1k', pretrained=True)
mixer.stem.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
mixer.head = nn.Linear(in_features=768, out_features=27, bias=True)

model.safetensors:   0%|          | 0.00/240M [00:00<?, ?B/s]

In [None]:
torch.cuda.empty_cache()
train_model(mixer, fsc, learning_rate=1e-4, num_epochs=12, batch_size=64)

torch.save(mixer.state_dict(), '/content/drive/MyDrive/data/Models/FSC22/mixer.pth')

Epoch 1/12
  Training Loss: 2.8369762897491455
  Validation Loss: 3.023735216685704, Accuracy: 11.11%
Epoch 2/12
  Training Loss: 2.3608925342559814
  Validation Loss: 2.4845817429678783, Accuracy: 24.69%
Epoch 3/12
  Training Loss: 1.6835899353027344
  Validation Loss: 1.994707499231611, Accuracy: 40.49%
Epoch 4/12
  Training Loss: 1.4145755767822266
  Validation Loss: 1.7589199032102312, Accuracy: 45.93%
Epoch 5/12
  Training Loss: 1.4111098051071167
  Validation Loss: 1.5368074178695679, Accuracy: 54.32%
Epoch 6/12
  Training Loss: 0.71235191822052
  Validation Loss: 1.5113468851361955, Accuracy: 55.31%
Epoch 7/12
  Training Loss: 0.4558401107788086
  Validation Loss: 1.5051241431917464, Accuracy: 58.52%
Epoch 8/12
  Training Loss: 0.23333513736724854
  Validation Loss: 1.4426105533327376, Accuracy: 61.73%
Epoch 9/12
  Training Loss: 0.05255194753408432
  Validation Loss: 1.5103943347930908, Accuracy: 58.27%
Epoch 10/12
  Training Loss: 0.017816785722970963
  Validation Loss: 1.5024

In [None]:
torch.cuda.empty_cache()
checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/mixer.pth')
mixer.load_state_dict(checkpoint)

generate_adversarial(fsc, mixer, 'mixer')

Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
Attack mode is changed to 'targeted(label)'.
started fgsm
0 - 

fgsm adv dataset for mixer
started bim
0 - 

bim adv dataset for mixer
started pgd
0 - 

pgd adv dataset for mixer
started cw
0 - 

cw adv dataset for mixer


In [None]:
import timm

def ret_pred(model, dataset):
    data_loader = DataLoader(dataset, batch_size=128)
    model = model.to(device)
    model.eval()
    all_predictions = []
    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input = batch_input.to(device)
            predictions = model(batch_input)
            all_predictions.append(predictions.argmax(dim=1))

    all_predictions = torch.cat(all_predictions)
    return all_predictions

def cal_ratio():
    attk_list = ['fgsm', 'bim', 'pgd', 'cw']
    adv_dir = 'mixer'

    vit_base = timm.create_model('vit_base_patch16_224.augreg2_in21k_ft_in1k', pretrained=True)
    vit_base.patch_embed.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
    vit_base.head = nn.Linear(in_features=768, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/vit_base.pth')
    vit_base.load_state_dict(checkpoint)

    resnet18 = timm.create_model('resnet18.a1_in1k', pretrained=True)
    resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    resnet18.fc = nn.Linear(in_features=512, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet18.pth')
    resnet18.load_state_dict(checkpoint)

    resnet50 = timm.create_model('resnet50.a1_in1k', pretrained=True)
    resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    resnet50.fc = nn.Linear(in_features=2048, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/resnet50.pth')
    resnet50.load_state_dict(checkpoint)

    mixer = timm.create_model('mixer_b16_224.goog_in21k_ft_in1k', pretrained=True)
    mixer.stem.proj = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
    mixer.head = nn.Linear(in_features=768, out_features=27, bias=True)
    torch.cuda.empty_cache()
    checkpoint = torch.load('/content/drive/MyDrive/data/Models/FSC22/mixer.pth')
    mixer.load_state_dict(checkpoint)


    for attk in attk_list:
        h5_file_path = f'/content/drive/MyDrive/data/Target/FSC22/{adv_dir}/{attk}.h5'
        data_list = []
        label_list = []

        with h5py.File(h5_file_path, 'r') as hf:
            # Iterate over the samples in the HDF5 file
            for sample_name in hf.keys():
                data = torch.tensor(hf[sample_name]['data'][:])
                label = hf[sample_name]['label'][()]

                # Append data and label to lists
                data_list.append(data)
                label_list.append(label)

        dataset_class = TensorDataset(torch.stack(data_list), torch.tensor(label_list))
        label_list = torch.tensor(label_list)

        r18 = ret_pred(mixer, dataset_class)
        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r18[i] == label_list[i]):
                unfooled += 1
            else:
                same += 1

        print (unfooled/2025, same/2025, diff/2025)

        print(f'Ratio for {adv_dir}/{attk} dataset')

        r50 = ret_pred(resnet18, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (r50[i] == label_list[i]):
                unfooled += 1
            elif (r50[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        vb = ret_pred(resnet50, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (vb[i] == label_list[i]):
                unfooled += 1
            elif (vb[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        m = ret_pred(vit_base, dataset_class)

        unfooled = 0
        diff = 0
        same = 0
        for i in range(2025):
            if (m[i] == label_list[i]):
                unfooled += 1
            elif (m[i] == r18[i]):
                same += 1
            else:
                diff += 1

        print (unfooled/2025, same/2025, diff/2025)

        del dataset_class
        del data_list
        del label_list

cal_ratio()

0.08395061728395062 0.9160493827160494 0.0
Ratio for mixer/fgsm dataset
0.6474074074074074 0.023703703703703703 0.3288888888888889
0.7550617283950617 0.013333333333333334 0.23160493827160494
0.6785185185185185 0.034074074074074076 0.2874074074074074
0.037530864197530864 0.9624691358024692 0.0
Ratio for mixer/bim dataset
0.8967901234567901 0.0019753086419753087 0.10123456790123457
0.9244444444444444 0.0004938271604938272 0.07506172839506173
0.6854320987654321 0.007901234567901235 0.30666666666666664
0.08296296296296296 0.917037037037037 0.0
Ratio for mixer/pgd dataset
0.8711111111111111 0.0044444444444444444 0.12444444444444444
0.9145679012345679 0.0024691358024691358 0.08296296296296296
0.6879012345679012 0.01037037037037037 0.3017283950617284
0.4059259259259259 0.5940740740740741 0.0
Ratio for mixer/cw dataset
0.9422222222222222 0.0034567901234567903 0.05432098765432099
0.948641975308642 0.0024691358024691358 0.04888888888888889
0.6874074074074074 0.014814814814814815 0.29777777777777