# Import Libraries

In [27]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar10

from art.estimators.classification import KerasClassifier
from art.defences.detector.poison import ActivationDefence

In [2]:
tf.compat.v1.disable_eager_execution()

# Load Data

In [3]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [4]:
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

In [5]:
#y_train = to_categorical(y_train, num_classes=10)
#y_test = to_categorical(y_test, num_classes=10)

# Load Models

In [6]:
safe_model_path = "./models/safe_model.h5"
safe_model = load_model(safe_model_path)

In [7]:
modified_model_path = "./models/poisoned_model.h5"
modified_model = load_model(modified_model_path)

# Evaluate Safe Model

In [8]:
safe_classifier = KerasClassifier(model=safe_model)
safe_defence = ActivationDefence(classifier=safe_classifier, x_train=x_test, y_train=y_test)
safe_report, safe_clean_list = safe_defence.detect_poison(clustering_method="KMeans",
                                                          nb_clusters=2, 
                                                          nb_dims=10,
                                                          reduce="PCA")

In [9]:
print("Safe Report: \n", safe_report)

Safe Report: 
 {'cluster_analysis': 'smaller', 'suspicious_clusters': 10, 'Class_0': {'cluster_0': {'ptc_data_in_cluster': 0.62, 'suspicious_cluster': False}, 'cluster_1': {'ptc_data_in_cluster': 0.38, 'suspicious_cluster': True}}, 'Class_1': {'cluster_0': {'ptc_data_in_cluster': 0.35, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.65, 'suspicious_cluster': False}}, 'Class_2': {'cluster_0': {'ptc_data_in_cluster': 0.34, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.66, 'suspicious_cluster': False}}, 'Class_3': {'cluster_0': {'ptc_data_in_cluster': 0.43, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.57, 'suspicious_cluster': False}}, 'Class_4': {'cluster_0': {'ptc_data_in_cluster': 0.5, 'suspicious_cluster': False}, 'cluster_1': {'ptc_data_in_cluster': 0.5, 'suspicious_cluster': True}}, 'Class_5': {'cluster_0': {'ptc_data_in_cluster': 0.71, 'suspicious_cluster': False}, 'cluster_1': {'ptc_data_in_cluster': 0.29, 'suspiciou

In [19]:
def report_summary(report):
    for class_num, class_data in report.items():
        if class_num.startswith("Class_"):
            suspicious_percentage = sum(cluster["ptc_data_in_cluster"] * 100 for cluster in class_data.values() if cluster["suspicious_cluster"])
            print(f"{class_num} = {int(suspicious_percentage)}%")

In [20]:
report_summary(safe_report)

Class_0 = 38%
Class_1 = 35%
Class_2 = 34%
Class_3 = 43%
Class_4 = 50%
Class_5 = 28%
Class_6 = 42%
Class_7 = 41%
Class_8 = 40%
Class_9 = 40%


# Evaluate Malicious Model

In [21]:
modified_classifier = KerasClassifier(model=modified_model)
modified_defence = ActivationDefence(classifier=modified_classifier, x_train=x_test, y_train=y_test)
modified_report, modified_clean_list = modified_defence.detect_poison(clustering_method="KMeans",
                                                                      nb_clusters=2, 
                                                                      nb_dims=10,
                                                                      reduce="PCA")

In [22]:
print("Modified Report: \n", modified_report)

Modified Report: 
 {'cluster_analysis': 'smaller', 'suspicious_clusters': 10, 'Class_0': {'cluster_0': {'ptc_data_in_cluster': 0.71, 'suspicious_cluster': False}, 'cluster_1': {'ptc_data_in_cluster': 0.29, 'suspicious_cluster': True}}, 'Class_1': {'cluster_0': {'ptc_data_in_cluster': 0.44, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.56, 'suspicious_cluster': False}}, 'Class_2': {'cluster_0': {'ptc_data_in_cluster': 0.81, 'suspicious_cluster': False}, 'cluster_1': {'ptc_data_in_cluster': 0.19, 'suspicious_cluster': True}}, 'Class_3': {'cluster_0': {'ptc_data_in_cluster': 0.43, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.57, 'suspicious_cluster': False}}, 'Class_4': {'cluster_0': {'ptc_data_in_cluster': 0.45, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.55, 'suspicious_cluster': False}}, 'Class_5': {'cluster_0': {'ptc_data_in_cluster': 0.34, 'suspicious_cluster': True}, 'cluster_1': {'ptc_data_in_cluster': 0.66, 'susp

In [24]:
report_summary(modified_report)

Class_0 = 28%
Class_1 = 44%
Class_2 = 19%
Class_3 = 43%
Class_4 = 45%
Class_5 = 34%
Class_6 = 44%
Class_7 = 40%
Class_8 = 12%
Class_9 = 44%


# Comparison

In [34]:
def compare_reports(report1, report2):
    report1_percentages = []
    for class_num, class_data in report1.items():
        if class_num.startswith("Class_"):
            suspicious_percentage = sum(cluster["ptc_data_in_cluster"] * 100 for cluster in class_data.values() if cluster["suspicious_cluster"])
            report1_percentages.append(suspicious_percentage)

    report2_percentages = []
    for class_num, class_data in report2.items():
        if class_num.startswith("Class_"):
            suspicious_percentage = sum(cluster["ptc_data_in_cluster"] * 100 for cluster in class_data.values() if cluster["suspicious_cluster"])
            report2_percentages.append(suspicious_percentage)

    classes = [f"Class_{i}" for i in range(10)]
    diff = [report1 - report2 for report1, report2 in zip(report1_percentages, report2_percentages)]

    df = pd.DataFrame({
        "Class Name": classes,
        "Safe Model": report1_percentages,
        "Modified Model": report2_percentages,
        "Diff": diff
    })
    return df

In [35]:
df = compare_reports(safe_report, modified_report)
print(df)

  Class Name  Safe Model  Modified Model  Diff
0    Class_0        38.0            29.0   9.0
1    Class_1        35.0            44.0  -9.0
2    Class_2        34.0            19.0  15.0
3    Class_3        43.0            43.0   0.0
4    Class_4        50.0            45.0   5.0
5    Class_5        29.0            34.0  -5.0
6    Class_6        42.0            44.0  -2.0
7    Class_7        41.0            40.0   1.0
8    Class_8        40.0            12.0  28.0
9    Class_9        40.0            44.0  -4.0
