In [1]:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

import tensorflow as tf

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline

import tensorflow_hub as thub
import tensorflow.keras.backend as K

from lib.data_utils import *

In [2]:
from art.classifiers import KerasClassifier
from art.attacks import FastGradientMethod
from art.attacks.extraction import CopycatCNN, FunctionallyEquivalentExtraction, KnockoffNets
import art

In [3]:
def sample_by_class(data, labels, num_samples=100):
    sample_data = []
    sample_labels = []
    unq_labels = list(range(labels.shape[1]))
    for label in unq_labels:
        idx = labels[:,label]==1
        sample_set = data[idx][0:num_samples].copy()
        label_set = labels[idx][0:num_samples].copy()
        sample_data.append(sample_set)
        sample_labels.append(label_set)
    
    sample_data = np.concatenate(sample_data)
    sample_labels = np.concatenate(sample_labels)
    print(sample_data.shape, sample_labels.shape)
    return sample_data, sample_labels

def subset_data(data, labels, fraction=5):
    data_size = data.shape[0]
    out_size = int(data_size*fraction/100)
    idx = np.random.choice(data_size, out_size, replace=False)
    out_data = data[idx].copy()
    out_labels = labels[idx].copy()
    
    print(out_data.shape, out_labels.shape)
    return out_data, out_labels

In [4]:
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Input
# from tensorflow.keras import Sequential
# from tensorflow.keras.optimizers import SGD
# from tensorflow.keras.losses import CategoricalCrossentropy
# from tensorflow.keras.utils import to_categorical


In [5]:
# Load the raw CIFAR-10 data
cifar10_dir = 'lib/datasets/cifar-10-batches-py'
x_train, y_train, x_test, y_test = load_cifar10(cifar10_dir)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [6]:
testd = x_train[0:1000].copy()
testl = y_train[0:1000].copy()

In [8]:
def build_ganeval_model(enable_logits=True):
    K.clear_session()
    model_url = "https://tfhub.dev/deepmind/ganeval-cifar10-convnet/1"
    ganeval_module = thub.Module(model_url)
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(32,32,3)))
    model.add(thub.KerasLayer(ganeval_module))
    if enable_logits:
        model.add(tf.keras.layers.Activation('linear'))
    else: 
        model.add(tf.keras.layers.Activation('softmax'))
        
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=1e-4),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=enable_logits),
                  metrics=['accuracy'])
    
    return model

In [10]:
ge_cifar_clf = build_ganeval_model(enable_logits=True)
ge_cifar_clf.summary()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 10)                7796426   
_________________________________________________________________
activation (Activation)      (None, 10)                0         
Total params: 7,796,426
Trainable params: 0
Non-trainable params: 7,796,426
_________________________________________________________________


### Testing the retrieved hub model by evaluating on train data
Should give us 100% or something close to that

In [11]:
ge_cifar_clf.evaluate(testd, testl)



[0.001668029203079641, 1.0]

### Testing a sample attack

In [147]:
classifier = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
attack_fgsm = FastGradientMethod(classifier=classifier, eps=0.4)

In [148]:
x_test_adv = attack_fgsm.generate(testd.copy())

In [149]:
ge_cifar_clf.evaluate(x_test_adv, testl)



[6.933382461547851, 0.102]

### Define the substitute classifier for our model

In [12]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation
from tensorflow.keras.losses import CategoricalCrossentropy

In [13]:
def build_substitute_model(enable_logits=True):
    model = tf.keras.Sequential( )
    model.add( Conv2D( 32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(32,32,3) ) )
    model.add( Conv2D( 64, (3, 3), padding='same', activation='relu' ) )
    model.add( MaxPooling2D( pool_size=(2, 2) ) )
    model.add( Flatten( ) )
    model.add( Dense( 128, activation='relu' ) )
    if enable_logits:
        model.add( Dense(10, activation='linear' ) )
    else:
        model.add( Dense(10, activation='softmax' ) )
        
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=enable_logits),
                  optimizer='adam',
                  metrics=['accuracy'] )
    

    
    return model

In [29]:
def build_substitute_model(enable_logits=True):
    model = tf.keras.Sequential( )
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=(32,32,3)))
    model.add(Activation("relu"))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    if enable_logits:
        model.add(Activation("linear"))
    else:
        model.add(Activation("softmax"))
        
    model.compile(loss=CategoricalCrossentropy(from_logits=enable_logits),
                  optimizer='adam',
                  metrics=['accuracy'] )
    
    return model

In [14]:
x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)

(1000, 32, 32, 3) (1000, 10)
(1000, 32, 32, 3) (1000, 10)


In [None]:
available_data_fraction = [5,10,20,30]

In [15]:
partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=30)

(15000, 32, 32, 3) (15000, 10)


In [30]:
ge_cifar_clf = build_ganeval_model()
sub_clf1 = build_substitute_model()
sub_clf2 = build_substitute_model()
# sub_clf1.summary()
# sub_clf2.summary()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [31]:
cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
stolen_clf1 = KerasClassifier(model=sub_clf1, clip_values=(0, 1), use_logits=True)
stolen_clf2 = KerasClassifier(model=sub_clf2, clip_values=(0, 1), use_logits=True)

In [32]:
CCC_results = []

In [33]:
attack_CCC = CopycatCNN(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=20,
                        nb_stolen=10000)

In [34]:
stolen_clf1 = attack_CCC.extract(partial_x_train, thieved_classifier=stolen_clf1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
attack_KN = KnockoffNets(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=20,
                        nb_stolen=10000,
                        sampling_strategy='adaptive')
stolen_clf2 = attack_KN.extract(partial_x_train, partial_y_train, thieved_classifier=stolen_clf2)

In [None]:
# attack_FEN = FunctionallyEquivalentExtraction(classifier=cloud_art_clf, num_neurons=100)
# stolen_clf_FEN = attack_FEN.extract(partial_x_train, partial_y_train)

In [None]:
### Sanity check
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)

stolen_clf2._model.evaluate(partial_x_train, partial_y_train)

# stolen_clf_FEN._model.evaluate(partial_x_train, partial_y_train)

In [None]:
cloud_art_clf._model.evaluate(x_train_adv, y_train_adv)
stolen_clf1._model.evaluate(x_train_adv, y_train_adv)
stolen_clf2._model.evaluate(x_train_adv, y_train_adv)
# stolen_clf_FEN._model.evaluate(x_train_adv, y_train_adv)

In [None]:
cloud_art_clf._model.evaluate(x_test_adv, y_test_adv)
stolen_clf1._model.evaluate(x_test_adv, y_test_adv)
stolen_clf2._model.evaluate(x_test_adv, y_test_adv)
# stolen_clf_FEN._model.evaluate(x_test_adv, y_test_adv)