In [1]:
import os
import glob
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set device
device = torch.device("cpu")

In [3]:
# Load dataset
imagePatches = glob.glob('archive/8863/*/*')
imagePatches = [imagePatches[i] for i in range(len(imagePatches)) if 'IDC' not in imagePatches[i]]
y = []
for img in imagePatches:
    if img.endswith('class0.png'):
        y.append(0)
    elif img.endswith('class1.png'):
        y.append(1)

In [4]:
# Create custom dataset
class MyDataset(Dataset):
    def __init__(self, df_data, transform=None):
        super().__init__()
        self.df = df_data.values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path, label = self.df[index]
        image = cv2.imread(img_path)
        image = cv2.resize(image, (50, 50))
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [5]:
# Prepare data
images_df = pd.DataFrame()
images_df["images"] = imagePatches
images_df["labels"] = y
train, test = train_test_split(images_df, stratify=images_df.labels, test_size=0.2, random_state=42)
train, val = train_test_split(train, stratify=train.labels, test_size=0.2, random_state=42)

In [6]:
print(train)

                                              images  labels
751   archive/8863/0/8863_idx5_x851_y1651_class0.png       0
34   archive/8863/1/8863_idx5_x1301_y1451_class1.png       1
132   archive/8863/1/8863_idx5_x1151_y751_class1.png       1
76   archive/8863/1/8863_idx5_x1501_y1051_class1.png       1
266  archive/8863/0/8863_idx5_x1651_y2301_class0.png       0
..                                               ...     ...
320  archive/8863/0/8863_idx5_x1901_y1851_class0.png       0
488   archive/8863/0/8863_idx5_x651_y1301_class0.png       0
701  archive/8863/0/8863_idx5_x1251_y1401_class0.png       0
525   archive/8863/0/8863_idx5_x501_y1451_class0.png       0
268   archive/8863/0/8863_idx5_x201_y1051_class0.png       0

[626 rows x 2 columns]


In [7]:
# Hyperparameters
num_epochs = 10
num_classes = 2
batch_size = 128
learning_rate = 0.001

In [8]:
# Data transformations
trans_train = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor()])
trans_valid = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor()])

In [9]:
# Create data loaders
dataset_train = MyDataset(df_data=train, transform=trans_train)
dataset_valid = MyDataset(df_data=val, transform=trans_valid)
dataset_test = MyDataset(df_data=test, transform=trans_valid)
loader_train = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, num_workers=0)
loader_valid = DataLoader(dataset=dataset_valid, batch_size=batch_size//2, shuffle=True, num_workers=0)
loader_test = DataLoader(dataset=dataset_test, batch_size=batch_size//2, shuffle=False, num_workers=0)

In [10]:
# Load pre-trained VGG16 model
vggmodel = models.vgg16(weights='IMAGENET1K_V1')
vggmodel.classifier[6] = nn.Linear(4096, num_classes)

In [11]:
# Freeze pre-trained layers
for n, p in vggmodel.named_parameters():
    if 'classifier' in n:
        pass
    else:
        p.requires_grad = False

In [12]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vggmodel.classifier.parameters(), lr=learning_rate, momentum=0.9)

In [13]:
# Move model to device
vggmodel.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [14]:
# Train the model
vgg_best_accuracy = 0
vgg_best_weights = None
trl, trac, vall, valac = [], [], [], []

In [15]:
for epoch in range(num_epochs):
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    vggmodel.train()
    for i, (inputs, targets) in enumerate(loader_train):
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = vggmodel(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.data, 1)
        train_loss += loss.item() * inputs.size(0)
        train_correct += (predicted == targets).sum().item()
        train_total += targets.size(0)
    train_loss /= len(train)
    train_acc = train_correct / train_total
    trl.append(train_loss)
    trac.append(train_acc)

    val_loss = 0.0
    val_correct = 0
    val_total = 0
    vggmodel.eval()
    with torch.no_grad():
        for inputs, targets in loader_valid:
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = vggmodel(inputs)
            loss = criterion(outputs, targets)
            _, predicted = torch.max(outputs.data, 1)
            val_loss += loss.item() * inputs.size(0)
            val_correct += (predicted == targets).sum().item()
            val_total += targets.size(0) 
    val_loss /= len(val)
    val_acc = val_correct / val_total
    vall.append(val_loss)
    valac.append(val_acc)

    if val_acc > vgg_best_accuracy:
        vgg_best_accuracy = val_acc
        vgg_best_weights = vggmodel.state_dict()

    print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

Epoch 1 - Train Loss: 0.5824, Train Acc: 0.7188, Val Loss: 0.4819, Val Acc: 0.8025
Epoch 2 - Train Loss: 0.4302, Train Acc: 0.8323, Val Loss: 0.5405, Val Acc: 0.7962
Epoch 3 - Train Loss: 0.4451, Train Acc: 0.8482, Val Loss: 0.4367, Val Acc: 0.8280
Epoch 4 - Train Loss: 0.3536, Train Acc: 0.8626, Val Loss: 0.3613, Val Acc: 0.8471
Epoch 5 - Train Loss: 0.3145, Train Acc: 0.8706, Val Loss: 0.3761, Val Acc: 0.8408
Epoch 6 - Train Loss: 0.3086, Train Acc: 0.8738, Val Loss: 0.3374, Val Acc: 0.8726
Epoch 7 - Train Loss: 0.2842, Train Acc: 0.8722, Val Loss: 0.3160, Val Acc: 0.8726
Epoch 8 - Train Loss: 0.2763, Train Acc: 0.8818, Val Loss: 0.3330, Val Acc: 0.8790
Epoch 9 - Train Loss: 0.2759, Train Acc: 0.8834, Val Loss: 0.3406, Val Acc: 0.8790
Epoch 10 - Train Loss: 0.2584, Train Acc: 0.8946, Val Loss: 0.3274, Val Acc: 0.8599


In [16]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loader_test:
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

Test Accuracy of the model on the test images: 90.3061224489796 %
[[146   9]
 [ 10  31]]
              precision    recall  f1-score   support

           0       0.94      0.94      0.94       155
           1       0.78      0.76      0.77        41

    accuracy                           0.90       196
   macro avg       0.86      0.85      0.85       196
weighted avg       0.90      0.90      0.90       196



In [17]:
# Adversarial attack functions
def fgsm_attack(model, criterion, images, labels, epsilon):
    model.eval()
    images = images.to(device)
    labels = labels.to(device)
    images.requires_grad = True
    outputs = model(images)
    loss = criterion(outputs, labels)
    model.zero_grad()
    loss.backward()
    gradient_sign = torch.sign(images.grad)
    perturbed_images = images + epsilon * gradient_sign
    perturbed_images = torch.clamp(perturbed_images, 0, 1)
    return perturbed_images

def pgd_attack(model, criterion, images, labels, epsilon, alpha, iters):
    model.eval()
    images = images.to(device)
    labels = labels.to(device)
    perturbed_images = images.clone().detach().requires_grad_(True)
    for i in range(iters):
        outputs = model(perturbed_images)
        loss = criterion(outputs, labels)
        model.zero_grad()
        gradients = autograd.grad(loss, perturbed_images)[0]
        gradient_sign = torch.sign(gradients)
        perturbed_images = perturbed_images + alpha * gradient_sign
        perturbed_images = torch.max(torch.min(perturbed_images, images + epsilon), images - epsilon)
        perturbed_images = torch.clamp(perturbed_images, 0, 1)
    return perturbed_images

def ifgsm_attack(model, criterion, images, labels, epsilon, alpha, iters):
    model.eval()
    images = images.to(device)
    labels = labels.to(device)
    perturbed_images = images.clone().detach().requires_grad_(True)
    for i in range(iters):
        outputs = model(perturbed_images)
        loss = criterion(outputs, labels)
        model.zero_grad()
        gradients = autograd.grad(loss, perturbed_images)[0]
        gradient_sign = torch.sign(gradients)
        perturbed_images = perturbed_images + alpha * gradient_sign
        perturbed_images = torch.max(torch.min(perturbed_images, images + epsilon), images - epsilon)
        perturbed_images = torch.clamp(perturbed_images, 0, 1)
    return perturbed_images

In [18]:
# Create a directory to save the adversarial examples
adv_examples_dir = 'adv_examples'
os.makedirs(adv_examples_dir, exist_ok=True)

In [19]:
# Function to save adversarial examples as image files
def save_adv_examples(adv_images, labels, attack_name):
    attack_dir = os.path.join(adv_examples_dir, attack_name)
    os.makedirs(attack_dir, exist_ok=True)

    for i, (adv_img, label) in enumerate(zip(adv_images, labels)):
        # Convert the adversarial image tensor to a PIL Image
        adv_img = adv_img.permute(1, 2, 0).cpu().detach().numpy()
        adv_img = (adv_img * 255).astype('uint8')
        adv_img = Image.fromarray(adv_img)

        # Save the adversarial image as a PNG file
        file_name = f"{label}_{i}.png"
        adv_img.save(os.path.join(attack_dir, file_name))

    print(f'Adversarial examples from {attack_name} attack saved to {attack_dir}')

In [20]:
# Save FGSM adversarial examples
fgsm_images, fgsm_labels = next(iter(loader_train))
fgsm_adv_images = fgsm_attack(vggmodel, criterion, fgsm_images, fgsm_labels, epsilon=0.05)

# Evaluate the model's predictions on the perturbed images
vggmodel.eval()
with torch.no_grad():
    outputs = vggmodel(fgsm_adv_images)
    _, predicted = torch.max(outputs.data, 1)
    
# Move fgsm_labels tensor to the same device as predicted tensor
fgsm_labels = fgsm_labels.to(predicted.device)

# Calculate accuracy
accuracy = (predicted == fgsm_labels).sum().item() / fgsm_labels.size(0)
print(f"Accuracy on FGSM perturbed images: {accuracy:.2%}")

save_adv_examples(fgsm_adv_images, fgsm_labels, 'fgsm')

Accuracy on FGSM perturbed images: 50.78%
Adversarial examples from fgsm attack saved to adv_examples/fgsm


In [21]:
# Save PGD adversarial examples
pgd_images, pgd_labels = next(iter(loader_train))
pgd_adv_images = pgd_attack(vggmodel, criterion, pgd_images, pgd_labels, epsilon=0.05, alpha=0.01, iters=10)

# Evaluate the model's predictions on the perturbed images
vggmodel.eval()
with torch.no_grad():
    outputs = vggmodel(pgd_adv_images)
    _, predicted = torch.max(outputs.data, 1)
    
# Move fgsm_labels tensor to the same device as predicted tensor
pgd_labels = pgd_labels.to(predicted.device)

# Calculate accuracy
accuracy = (predicted == pgd_labels).sum().item() / pgd_labels.size(0)
print(f"Accuracy on PGD perturbed images: {accuracy:.2%}")

save_adv_examples(pgd_adv_images, pgd_labels, 'pgd')

Accuracy on PGD perturbed images: 1.56%
Adversarial examples from pgd attack saved to adv_examples/pgd


In [22]:
# Save I-FGSM adversarial examples
ifgsm_images, ifgsm_labels = next(iter(loader_train))
ifgsm_adv_images = ifgsm_attack(vggmodel, criterion, ifgsm_images, ifgsm_labels, epsilon=0.05, alpha=0.01, iters=10)

# Evaluate the model's predictions on the perturbed images
vggmodel.eval()
with torch.no_grad():
    outputs = vggmodel(ifgsm_adv_images)
    _, predicted = torch.max(outputs.data, 1)
    
# Move fgsm_labels tensor to the same device as predicted tensor
ifgsm_labels = ifgsm_labels.to(predicted.device)

# Calculate accuracy
accuracy = (predicted == ifgsm_labels).sum().item() / ifgsm_labels.size(0)
print(f"Accuracy on I-FGSM perturbed images: {accuracy:.2%}")


save_adv_examples(ifgsm_adv_images, ifgsm_labels, 'ifgsm')

Accuracy on I-FGSM perturbed images: 0.78%
Adversarial examples from ifgsm attack saved to adv_examples/ifgsm


In [23]:
all_image_patches = glob.glob('adv_examples/*/*')
new_y = []
for img in all_image_patches:
    imgName = img.split("/")[-1]
    if imgName.startswith('0_'):
        new_y.append(0)
    elif imgName.startswith('1_'):
        new_y.append(1)

In [24]:
# Load dataset
imagePatches = glob.glob('dataset/8863/*/*')
imagePatches = [imagePatches[i] for i in range(len(imagePatches)) if 'IDC' not in imagePatches[i]]
for img in imagePatches:
    all_image_patches.append(img)
    if img.endswith('class0.png'):
        new_y.append(0)
    elif img.endswith('class1.png'):
        new_y.append(1)

In [25]:
print(len(all_image_patches))

516


In [26]:
print(len(new_y))

516


In [27]:
# Prepare data
images_df = pd.DataFrame()
images_df["images"] = all_image_patches
images_df["labels"] = new_y
train, test = train_test_split(images_df, stratify=images_df.labels, test_size=0.2, random_state=42)
train, val = train_test_split(train, stratify=train.labels, test_size=0.2, random_state=42)

In [28]:
# Create data loaders
dataset_train = MyDataset(df_data=train, transform=trans_train)
dataset_valid = MyDataset(df_data=val, transform=trans_valid)
dataset_test = MyDataset(df_data=test, transform=trans_valid)
loader_train = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, num_workers=0)
loader_valid = DataLoader(dataset=dataset_valid, batch_size=batch_size//2, shuffle=True, num_workers=0)
loader_test = DataLoader(dataset=dataset_test, batch_size=batch_size//2, shuffle=False, num_workers=0)

In [29]:
# Reset the model and optimizer
vggmodel = models.vgg16(weights='IMAGENET1K_V1')
vggmodel.classifier[6] = nn.Linear(4096, num_classes)
for n, p in vggmodel.named_parameters():
    if 'classifier' in n:
        pass
    else:
        p.requires_grad = False
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vggmodel.classifier.parameters(), lr=learning_rate, momentum=0.9)
vggmodel.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [30]:
# Train the model with the combined data
vgg_best_accuracy = 0
vgg_best_weights = None
trl, trac, vall, valac = [], [], [], []

for epoch in range(num_epochs):
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    vggmodel.train()
    for i, (inputs, targets) in enumerate(loader_train):
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = vggmodel(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.data, 1)
        train_loss += loss.item() * inputs.size(0)
        train_correct += (predicted == targets).sum().item()
        train_total += targets.size(0)
    train_loss /= len(dataset_train)
    train_acc = train_correct / train_total
    trl.append(train_loss)
    trac.append(train_acc)

    val_loss = 0.0
    val_correct = 0
    val_total = 0
    vggmodel.eval()
    with torch.no_grad():
        for inputs, targets in loader_valid:
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = vggmodel(inputs)
            loss = criterion(outputs, targets)
            _, predicted = torch.max(outputs.data, 1)
            val_loss += loss.item() * inputs.size(0)
            val_correct += (predicted == targets).sum().item()
            val_total += targets.size(0)
    val_loss /= len(val)
    val_acc = val_correct / val_total
    vall.append(val_loss)
    valac.append(val_acc)

    if val_acc > vgg_best_accuracy:
        vgg_best_accuracy = val_acc
        vgg_best_weights = vggmodel.state_dict()

    print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

Epoch 1 - Train Loss: 0.7442, Train Acc: 0.5836, Val Loss: 0.2897, Val Acc: 0.8795
Epoch 2 - Train Loss: 0.1983, Train Acc: 0.9362, Val Loss: 0.1698, Val Acc: 0.9157
Epoch 3 - Train Loss: 0.1451, Train Acc: 0.9392, Val Loss: 0.1312, Val Acc: 0.9398
Epoch 4 - Train Loss: 0.1380, Train Acc: 0.9422, Val Loss: 0.1174, Val Acc: 0.9398
Epoch 5 - Train Loss: 0.1455, Train Acc: 0.9392, Val Loss: 0.1100, Val Acc: 0.9398
Epoch 6 - Train Loss: 0.1126, Train Acc: 0.9574, Val Loss: 0.1050, Val Acc: 0.9518
Epoch 7 - Train Loss: 0.1143, Train Acc: 0.9574, Val Loss: 0.0945, Val Acc: 0.9518
Epoch 8 - Train Loss: 0.1246, Train Acc: 0.9514, Val Loss: 0.0897, Val Acc: 0.9518
Epoch 9 - Train Loss: 0.1163, Train Acc: 0.9544, Val Loss: 0.0829, Val Acc: 0.9518
Epoch 10 - Train Loss: 0.1202, Train Acc: 0.9544, Val Loss: 0.0814, Val Acc: 0.9518


In [31]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loader_test:
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

Test Accuracy of the model on the test images: 98.07692307692308 %
[[73  1]
 [ 1 29]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99        74
           1       0.97      0.97      0.97        30

    accuracy                           0.98       104
   macro avg       0.98      0.98      0.98       104
weighted avg       0.98      0.98      0.98       104



In [32]:
FGSM_IMAGES = glob.glob("adv_examples/fgsm/0_0.png")
fgsm_y = []
for img in FGSM_IMAGES:
    imgName = img.split("/")[-1]
    if imgName.startswith('0_'):
        fgsm_y.append(0)
    elif imgName.startswith('1_'):
        fgsm_y.append(1)

In [33]:
images_df = pd.DataFrame()
images_df["images"] = FGSM_IMAGES
images_df["labels"] = fgsm_y
fgsm_test = MyDataset(df_data=images_df, transform=trans_train)
loader_fgsm_test = DataLoader(dataset=fgsm_test, batch_size=batch_size//2, shuffle=False, num_workers=0)

In [34]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loader_fgsm_test:
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

Test Accuracy of the model on the test images: 100.0 %
[[1]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1





In [35]:
PGD_IMAGES = glob.glob("adv_examples/pgd/*")
pgd_y = []
for img in PGD_IMAGES:
    imgName = img.split("/")[-1]
    if imgName.startswith('0_'):
        pgd_y.append(0)
    elif imgName.startswith('1_'):
        pgd_y.append(1)

In [36]:
images_df = pd.DataFrame()
images_df["images"] = PGD_IMAGES
images_df["labels"] = pgd_y
pgd_test = MyDataset(df_data=images_df, transform=trans_train)
loader_pgd_test = DataLoader(dataset=pgd_test, batch_size=batch_size//2, shuffle=False, num_workers=0)

In [37]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loader_pgd_test:
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

Test Accuracy of the model on the test images: 98.80952380952381 %
[[123   0]
 [  2  43]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       123
           1       1.00      0.96      0.98        45

    accuracy                           0.99       168
   macro avg       0.99      0.98      0.98       168
weighted avg       0.99      0.99      0.99       168



In [38]:
I_FGSM_IMAGES = glob.glob("adv_examples/ifgsm/*")
ifgsm_y = []
for img in I_FGSM_IMAGES:
    imgName = img.split("/")[-1]
    if imgName.startswith('0_'):
        ifgsm_y.append(0)
    elif imgName.startswith('1_'):
        ifgsm_y.append(1)

In [39]:
images_df = pd.DataFrame()
images_df["images"] = I_FGSM_IMAGES
images_df["labels"] = ifgsm_y
ifgsm_test = MyDataset(df_data=images_df, transform=trans_train)
loader_i_fgsm_test = DataLoader(dataset=ifgsm_test, batch_size=batch_size//2, shuffle=False, num_workers=0)

In [40]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loader_i_fgsm_test:
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

Test Accuracy of the model on the test images: 100.0 %
[[121   0]
 [  0  58]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       121
           1       1.00      1.00      1.00        58

    accuracy                           1.00       179
   macro avg       1.00      1.00      1.00       179
weighted avg       1.00      1.00      1.00       179



In [41]:
from diffusers import StableDiffusionInpaintPipeline
import torchvision.transforms as transforms
from PIL import Image
import torch

# Load the pre-trained diffusion model
diffusion_model = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting")
diffusion_model.to(device)

# Define a function to denoise adversarial examples
def denoise_adv_examples(adv_examples, labels):
    denoised_examples = []
    for adv_img_path, label in zip(adv_examples, labels):
        # Load the adversarial image
        adv_img = Image.open(adv_img_path)

        # Convert the adversarial image to a tensor
        transform = transforms.Compose([transforms.ToTensor()])
        adv_img_tensor = transform(adv_img)

        # Create a mask image (assuming a simple mask for demonstration purposes)
        mask_img = Image.new('L', adv_img.size, 255)  # Create a white mask image
        mask_img_tensor = transform(mask_img)

        # Denoise the adversarial image using the diffusion model
        denoised_img = diffusion_model("", adv_img_tensor, mask_image=mask_img_tensor, guidance_scale=7.5)["images"][0]

        # Convert the denoised image back to a tensor
        denoised_tensor = transform(denoised_img)

        denoised_examples.append(denoised_tensor)

    return denoised_examples


safety_checker/model.safetensors not found
Fetching 16 files: 100%|██████████| 16/16 [05:26<00:00, 20.43s/it]
Loading pipeline components...:  43%|████▎     | 3/7 [00:00<00:01,  3.93it/s]

In [None]:
# Denoise the FGSM adversarial examples
fgsm_denoised_examples = denoise_adv_examples(FGSM_IMAGES, fgsm_y)
print(fgsm_denoised_examples)

In [None]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in enumerate(fgsm_denoised_examples, fgsm_y):
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

In [None]:
# Denoise the PGD adversarial examples
pgd_denoised_examples = denoise_adv_examples(PGD_IMAGES, pgd_y)
print(pgd_denoised_examples)

In [None]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in enumerate(pgd_denoised_examples, pgd_y):
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')

In [None]:
# Denoise the I-FGSM adversarial examples
ifgsm_denoised_examples = denoise_adv_examples(I_FGSM_IMAGES, ifgsm_y)
print(ifgsm_denoised_examples)

In [None]:
cuda_tensor = torch.tensor(vall)
vls = cuda_tensor.cpu()
cuda_tensor = torch.tensor(trl)
tls = cuda_tensor.cpu()

vggmodel.load_state_dict(vgg_best_weights)
vggmodel.to(device)

vggpredict = []
vgglabel = []

vggmodel.eval()
confusion_matrix = torch.zeros(2, 2)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in enumerate(ifgsm_denoised_examples, ifgsm_y):
        images = images.to(device)
        labels = labels.to(device)
        outputs = vggmodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        vggpredict.extend(predicted)
        vgglabel.extend(labels)

        for t, p in zip(labels.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

label_vgg = [tensor.cpu().numpy() for tensor in vgglabel]
vgg_array = [tensor.cpu().numpy() for tensor in vggpredict]

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(label_vgg, vgg_array))
print(classification_report(label_vgg, vgg_array))

dfv = pd.DataFrame()
dfv["vgg"] = vgg_array
dfv["label"] = label_vgg
dfv.head()
dfv.to_csv('vgwithaug.csv')

torch.save({
    'model_state_dict': vggmodel.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}, 'checkpointvgg50withaug.pth')