In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

# Step 1: Load and preprocess dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Step 2: Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test))  # Train quickly with 1 epoch

# Step 3: Generate adversarial example using FGSM
def generate_fgsm(model, image, label, epsilon=0.01):
    image = tf.convert_to_tensor(image[None], dtype=tf.float32)
    label = tf.convert_to_tensor(label[None])
    with tf.GradientTape() as tape:
        tape.watch(image)
        prediction = model(image)
        loss = tf.keras.losses.sparse_categorical_crossentropy(label, prediction)
    gradient = tape.gradient(loss, image)
    signed_grad = tf.sign(gradient)
    adv_image = image + epsilon * signed_grad
    adv_image = tf.clip_by_value(adv_image, 0, 1)
    return adv_image.numpy()[0]

# Step 4: Extract statistical features
def extract_stats(img):
    return [np.mean(img), np.std(img), np.max(img), np.min(img)]

# Step 5: Build dataset with clean and adversarial features
features, true_labels = [], []
for i in range(100):
    clean_img = x_test[i]
    label = y_test[i]

    features.append(extract_stats(clean_img))
    true_labels.append(0)  # Clean

    adv_img = generate_fgsm(model, clean_img, label)
    features.append(extract_stats(adv_img))
    true_labels.append(1)  # Adversarial

# Step 6: Train anomaly detector
detector = IsolationForest()
detector.fit(features)
preds = detector.predict(features)
preds = [1 if p == -1 else 0 for p in preds]  # convert: -1 → adversarial (1)

# Step 7: Print classification results
print(classification_report(true_labels, preds, target_names=["Clean", "Adversarial"]))


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1227s[0m 7us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 15ms/step - accuracy: 0.3857 - loss: 1.6988 - val_accuracy: 0.5519 - val_loss: 1.2710
              precision    recall  f1-score   support

       Clean       0.50      0.79      0.61       100
 Adversarial       0.50      0.21      0.30       100

    accuracy                           0.50       200
   macro avg       0.50      0.50      0.45       200
weighted avg       0.50      0.50      0.45       200

