##Get The data and process it

In [None]:
# mount google drive on your runtime using and authorization code.
# more details here: https://colab.research.google.com/notebooks/io.ipynb
from google.colab import drive
drive.mount('/content/drive')

#some imports

In [None]:
# import library
#You will need to add yours
import os
import numpy as np
import pandas as pd
import random

import tensorflow as tf
from tensorflow import keras
import pickle
#import os
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.models import Model
from art.attacks.poisoning.perturbations import add_pattern_bd, add_single_bd, insert_image

import cv2
from google.colab.patches import cv2_imshow

from imutils import paths

from sklearn.model_selection import train_test_split
from sklearn import preprocessing


#Path to the data

In [None]:
# Path to the directory containing the your project info (Change to your location)
PROJECT_ROOT_DIR = '/content/drive/MyDrive/CS5331_CS4331_Fa24/img/'

# Path to the directory containing the dataset
# DOWNLOAD BUSI dataset here: https://scholar.cu.edu.eg/?q=afahmy/pages/dataset
DATA_DIR = 'Dataset_BUSI_with_GT/'

#loading data

In [None]:
# Funciton for loading the dataset
# reference: https://www.pyimagesearch.com/2018/09/10/keras-tutorial-how-to-get-started-with-keras-deep-learning-and-python/
def load_image():
  # initialize the data and labels for each class
  data = []
  labels = []

  data_aside = []
  labels_aside = []

  benign_data = []
  benign_labels = []

  malignant_data = []
  malignant_labels = []

  normal_data = []
  normal_labels = []

  # load benign image
  for i in range(1,438):
    image = cv2.imread(PROJECT_ROOT_DIR + DATA_DIR + 'benign/' + 'benign (' + str(i) + ').png')
    # resize image to 224 * 224 * 3
    image = cv2.resize(image, (224, 224))
    benign_data.append(image)
    # extract the class label from the image folder
    benign_labels.append('benign')

  for i in range(0,437):
      data.append(benign_data[i])
      labels.append(benign_labels[i])

  # load malignant image
  for i in range(1,211):
    image = cv2.imread(PROJECT_ROOT_DIR + DATA_DIR + 'malignant/' + 'malignant (' + str(i) + ').png')
    # resize image to 224 * 224 * 3
    image = cv2.resize(image, (224, 224))
    malignant_data.append(image)
    # extract the class label from the image folder
    malignant_labels.append('malignant')

  for i in range(0,210):
      data.append(malignant_data[i])
      labels.append(malignant_labels[i])

  # load normal image
  for i in range(1,134):
    image = cv2.imread(PROJECT_ROOT_DIR + DATA_DIR + 'normal/' + 'normal (' + str(i) + ').png')
    # resize image to 224 * 224 * 3
    image = cv2.resize(image, (224, 224))
    normal_data.append(image)
    # extract the class label from the image folder
    normal_labels.append('normal')

  for i in range(0,133):
      data.append(normal_data[i])
      labels.append(normal_labels[i])

  return data, labels
# Function for image preprocessing
def preprocess(data,labels):
  # Save training and test image to numpy, Scale image features to be in [0, 1]
  data = np.array(data, dtype = np.float32) / 255.0
  # Save labels to numpy encode label to integer catergory 0 = 'benign', 1 = 'malignant', 2 = 'normal'
  labels = np.array(labels)
  new_label_encoder = preprocessing.LabelEncoder()
  new_label_encoder.fit(labels)
  targets = new_label_encoder.transform(labels)

  return data, targets

In [None]:
# Load the BUSI images and labels
# This will take time (my time was around 12 min)
data, labels = load_image()
data, labels = preprocess(data,labels)

# split data into 80% train and 20% test, shuffle the data with
(imgs_train, imgs_test, labels_train, labels_test) = train_test_split(data, labels, test_size = 0.2, random_state=42, shuffle = True)
# split data into 60% train data and 20% validation data
(imgs_train, imgs_val, labels_train, labels_val) = train_test_split(imgs_train, labels_train, test_size = 0.2, random_state=42, shuffle = True)

In [None]:
# Display the shapes of train, validation, and test datasets
print('Images train shape: {} - Labels train shape: {}'.format(imgs_train.shape, labels_train.shape))
print('Images validation shape: {} - Labels validation shape: {}'.format(imgs_val.shape, labels_val.shape))
print('Images test shape: {} - Labels test shape: {}'.format(imgs_test.shape, labels_test.shape))


## Set constants and convert labels

In [None]:
## Set constants
NUM_LABELS = 3                             # Number of labels
BATCH_SIZE = 16                             # Size of batch
HEIGHT = 224                                 # Height of input image
WIDTH = 224                                  # Width of input image
N_CHANNEL = 3                               # Number of channels
OUTPUT_DIM = 3                             # Number of output dimension

# Set training hyperparameters
NUM_EPOCH = 100                             # Number of epoch to train
LR = 0.0001                                 # Learning rate

INPUT_SHAPE = (HEIGHT, WIDTH, N_CHANNEL)  # Input shape of model
IMG_SHAPE = (HEIGHT, WIDTH, N_CHANNEL)

In [None]:
# Convert the labels
from tensorflow import keras
labels_train_cat = keras.utils.to_categorical(labels_train, NUM_LABELS)
labels_test_cat = keras.utils.to_categorical(labels_test, NUM_LABELS)
labels_val_cat = keras.utils.to_categorical(labels_val, NUM_LABELS)
all_labels = np.concatenate((labels_train, labels_test, labels_val), axis=0)
label_names = np.unique(labels_val)
print(label_names)


In [None]:
# Plot a few images to check if the labels make sense
import matplotlib.pyplot as plt
plt.figure(figsize=(9, 9))
plt.rcParams.update({'font.size': 8})
for n in range(9):
    i = np.random.randint(0, len(imgs_train), 1)
    ax = plt.subplot(3, 3, n+1)
    plt.imshow(imgs_train[i[0]])
    plt.title('Label: ' + str(labels_train[i[0]]))
    plt.axis('off')


##Task 1
CS5331 your task 1 goes here
The first task will require you to train a deep-learning model to classify BUSI images. The training, validation, and testing datasets are already given to you. For full marks, the classification test accuracy is expected to be above 85%. Further, you should not have an overfitted model. This will look like a model you already built for a previous assignment, and if you recognize it and want to use it, feel free to do so.

In [None]:
## Impliment your model for task 1, use whatever steps you want as long as you meet the requirements

In [None]:
#Evaluate your model

##Task 2

CS5331 here is your task 2

# Code given to you for analysis/modification
1.	Answer the following questions for the implemented attack.

    a.	What type of modifications to images are implemented? What each one of them is doing?

    b.	What does the poison_dataset do? What does it return? Be sure to have details here.
2.	Implementation. Implement poison_dataset function that takes clean images, clean labels, percentage of poisoning, and the poisoning function. The function should return 4 arrays, including a Boolean if the sample is poisoned or not, the sample, the label for that sample, and the original dataset label for that sample. The provided notebook is your guide; however, there are changes that you need to make.



In [None]:
def add_modification(x):
  if BACKDOOR_TYPE == 'pattern':
      return add_pattern_bd(x, pixel_value=max_val)
  elif BACKDOOR_TYPE == 'pixel':
      return add_single_bd(x, pixel_value=max_val)
  elif BACKDOOR_TYPE == 'image':
      return insert_image(x, backdoor_path='../utils/data/backdoors/alert.png', size=(10,10))
  else:
      raise("Unknown backdoor type")

def add_pattern_bd(x: np.ndarray, distance: int = 2, pixel_value: int = 1) -> np.ndarray:
  """
  Augments a matrix by setting a checkboard-like pattern of values some `distance` away from the bottom-right
  edge to 1. Works for single images or a batch of images.
  :param x: N X W X H matrix or W X H matrix or N X W X H X C matrix, pixels will ne added to all channels
  :param distance: Distance from bottom-right walls.
  :param pixel_value: Value used to replace the entries of the image matrix.
  :return: Backdoored image.
  """
  x = np.array(x)
  shape = x.shape
  if len(shape) == 4:
    width, height = x.shape[1:3]
    x[:, width - distance -1, height - distance -1, :] = pixel_value
    x[:, width - distance -1, height - distance - 3, :] = pixel_value
    x[:, width - distance -1, height - distance - 5, :] = pixel_value
    x[:, width - distance -1, height - distance - 7, :] = pixel_value
    x[:, width - distance - 3, height - distance -1, :] = pixel_value
    x[:, width - distance - 5, height - distance -1, :] = pixel_value
    x[:, width - distance - 7, height - distance -1, :] = pixel_value
    x[:, width - distance - 2, height - distance - 2, :] = pixel_value
    x[:, width - distance - 4, height - distance - 2, :] = pixel_value
    x[:, width - distance - 6, height - distance - 2, :] = pixel_value
    x[:, width - distance - 2, height - distance - 4, :] = pixel_value
    x[:, width - distance - 2, height - distance - 6, :] = pixel_value
    x[:, width - distance - 3, height - distance - 3, :] = pixel_value
    x[:, width - distance - 3, height - distance - 5, :] = pixel_value
    x[:, width - distance - 5, height - distance - 3, :] = pixel_value
    x[:, width - distance - 4, height - distance - 4, :] = pixel_value

  elif len(shape) == 3:
    width, height = x.shape[1:]
    x[:, width - distance, height - distance] = pixel_value
    x[:, width - distance - 1, height - distance - 1] = pixel_value
    x[:, width - distance, height - distance - 2] = pixel_value
    x[:, width - distance - 2, height - distance] = pixel_value
  elif len(shape) == 2:
    width, height = x.shape
    x[width - distance, height - distance] = pixel_value
    x[width - distance - 1, height - distance - 1] = pixel_value
    x[width - distance, height - distance - 2] = pixel_value
    x[width - distance - 2, height - distance] = pixel_value
  else:
    raise ValueError("Invalid array shape: " + str(shape))
  return x

def poison_dataset(x_clean, y_clean, percent_poison, poison_func):
  #Your Implimentation here. Here is what you should return
  return is_poison, x_poison, y_poison, y_original

In [None]:
BACKDOOR_TYPE = "pattern" ##You may change it to others and see the performance if you wish

# poison the daataset
3.	 Create poisoned training images using BUSI images’ training and validation sets. Select the percentage of poisoned images to be 20%. You may choose a different value if you wish to.
4.	Plot at least 9 images with the applied backdoor pattern and display the target label for the images and if they are poisoned or not.
5. Create a poisoned test dataset by adding poisoned images to the original test dataset of 156 images.


In [None]:
#poison the training

#Print the shape of the training to make sure you are correct


In [None]:
# Plot a few images


In [None]:
#poison the test images

#Train on poisened images
6.	Implementation. Train a poisoned model on the poisoned set of images. You can try training for a few epochs (maybe around 15 epochs), but if the attack success rate is low, you can retrain the model for longer. Estimated time on GPU: between 3 and 10 minutes.

7.	Evaluate the poisoned model on clean test images and report the classification accuracy. Fill in Table 2. The classification accuracy on clean test images should be high and not significantly lower than the original accuracy of the model. For full marks, the accuracy should be at least 80%.


8.	Plot at least 9 clean images, and show the true, predicted class label, and if the image is poisoned or not. The figure below is an example without the poisoned image label. Make sure to add that.

10. Evaluate the model on poisoned test images. Report how many of the poisoned benign images were classified as malignant images. For full marks, the attack success rate should be above 70%.

11.	Plot at least 9 poisoned images, and show the target predicted class label, and if the image is poisoned or not.

13. Plot at least 12 poisoned random images from all, and show the target predicted class label, and if the image is poisoned or not. An example is shown below.



In [None]:
# Shuffle training data


In [None]:
# Fit your ART classifier with the correct training

In [None]:
#Evaluate on Clean images

In [None]:
#Plot clean images

In [None]:
#Evaluate on poisoned images

In [None]:
#plot poisoned images

In [None]:
#plot a mix of poisoned and not poisened images