<a href="https://colab.research.google.com/github/ktcliff/KaggleCats-DogsDataSetPosoning/blob/main/KaggleCats%26DogsDataSetPosoning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Sequential Model Poisoning on 68MB Dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
! pip install kaggle

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp /content/drive/MyDrive/Kaggle_API/kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download -d samuelcortinhas/cats-and-dogs-image-classification

In [None]:
! unzip cats-and-dogs-image-classification.zip

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
from matplotlib import pyplot as plt

print(tf.__version__)

In [None]:
# Define the path to the dataset
data_dir = '/content/train'

# Load the dataset using Keras' image data generator
train_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='training')

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='validation')

In [None]:
# Define the model creation function
def create_model(optimizer, activation_param):
    model = tf.keras.models.Sequential([
        keras.layers.Conv2D(32, (3, 3), activation=activation_param, input_shape=(150, 150, 3)),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Conv2D(64, (3, 3), activation=activation_param),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Conv2D(128, (3, 3), activation=activation_param),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Flatten(),
        keras.layers.Dense(512, activation=activation_param),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Training with clean data
model = create_model('adam', 'relu')

history_clean = model.fit(train_generator,
                          epochs=10,
                          validation_data=validation_generator)

test_score_clean, accuracy_clean = model.evaluate(validation_generator)
print("Clean Data Model Accuracy: ", accuracy_clean)

In [None]:
# Visualize training & validation accuracy and loss for clean data
acc_clean = history_clean.history['accuracy']
val_acc_clean = history_clean.history['val_accuracy']
loss_clean = history_clean.history['loss']
val_loss_clean = history_clean.history['val_loss']
epochs_range = range(10)

plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc_clean, label='Training Accuracy')
plt.plot(epochs_range, val_acc_clean, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy (Clean Data)')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss_clean, label='Training Loss')
plt.plot(epochs_range, val_loss_clean, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss (Clean Data)')

In [None]:
# Function to poison data
def poison_data(generator, target_label, replacement_label, number_to_replace):
    count = 0
    for i, (images, labels) in enumerate(generator):
        for j in range(len(labels)):
            if labels[j] == target_label and count < number_to_replace:
                labels[j] = replacement_label
                count += 1
        if count >= number_to_replace:
            break
    return generator

In [None]:
# Poison the data (example: change 200 'dog' labels to 'cat' labels)
poisoned_train_generator = poison_data(train_generator, 1, 0, 200)

# Training with poisoned data
model_poisoned = create_model('adam', 'relu')
history_poisoned = model_poisoned.fit(poisoned_train_generator,
                                      epochs=10,
                                      validation_data=validation_generator)

test_score_poisoned, accuracy_poisoned = model_poisoned.evaluate(validation_generator)
print("Poisoned Data Model Accuracy: ", accuracy_poisoned)

In [None]:
# Visualize training & validation accuracy and loss for poisoned data
acc_poisoned = history_poisoned.history['accuracy']
val_acc_poisoned = history_poisoned.history['val_accuracy']
loss_poisoned = history_poisoned.history['loss']
val_loss_poisoned = history_poisoned.history['val_loss']
epochs_range = range(10)

plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc_poisoned, label='Training Accuracy')
plt.plot(epochs_range, val_acc_poisoned, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy (Poisoned Data)')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss_poisoned, label='Training Loss')
plt.plot(epochs_range, val_loss_poisoned, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss (Poisoned Data)')