In [1]:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

import tensorflow as tf

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline

import tensorflow_hub as thub
import tensorflow.keras.backend as K

from lib.data_utils import *

In [2]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation
from tensorflow.keras.losses import CategoricalCrossentropy

In [3]:
# from tensorflow.keras.models import Model
from tensorflow.keras.layers import InputLayer, Reshape
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical


In [4]:
from art.classifiers import KerasClassifier
from art.attacks import FastGradientMethod
from art.attacks.extraction import CopycatCNN, FunctionallyEquivalentExtraction, KnockoffNets
import art

In [5]:
def sample_by_class(data, labels, num_samples=100):
    sample_data = []
    sample_labels = []
    unq_labels = list(range(labels.shape[1]))
    for label in unq_labels:
        idx = labels[:,label]==1
        sample_set = data[idx][0:num_samples].copy()
        label_set = labels[idx][0:num_samples].copy()
        sample_data.append(sample_set)
        sample_labels.append(label_set)
    
    sample_data = np.concatenate(sample_data)
    sample_labels = np.concatenate(sample_labels)
    print(sample_data.shape, sample_labels.shape)
    return sample_data, sample_labels

def subset_data(data, labels, fraction=5):
    data_size = data.shape[0]
    out_size = int(data_size*fraction/100)
    idx = np.random.choice(data_size, out_size, replace=False)
    out_data = data[idx].copy()
    out_labels = labels[idx].copy()
    
    print(out_data.shape, out_labels.shape)
    return out_data, out_labels

In [39]:
# Load the raw CIFAR-10 data
cifar10_dir = 'lib/datasets/cifar-10-batches-py'
x_train, y_train, x_test, y_test = load_cifar10(cifar10_dir)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [7]:
testd = x_train[0:1000].copy()
testl = y_train[0:1000].copy()

In [8]:
def build_ganeval_model(enable_logits=True):
    K.clear_session()
    model_url = "https://tfhub.dev/deepmind/ganeval-cifar10-convnet/1"
    ganeval_module = thub.Module(model_url)
    
    model = Sequential()
    model.add(InputLayer(input_shape=(32,32,3)))
    model.add(thub.KerasLayer(ganeval_module))
    if enable_logits:
        model.add(Activation('linear'))
    else: 
        model.add(Activation('softmax'))
        
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=1e-4),
                  loss=CategoricalCrossentropy(from_logits=enable_logits),
                  metrics=['accuracy'])
    
    return model

In [9]:
def build_ganeval_model_flat(enable_logits=True):
    K.clear_session()
    model_url = "https://tfhub.dev/deepmind/ganeval-cifar10-convnet/1"
    ganeval_module = thub.Module(model_url)
    
    model = Sequential()
#     model.add(InputLayer(input_shape=(32*32*3,)))
    model.add(Reshape((32,32,1),input_shape=(32*32*1,)))
    model.add(Conv2D(3, (3, 3), activation='relu', padding='same'))
    model.add(thub.KerasLayer(ganeval_module))
    if enable_logits:
        model.add(Activation('linear'))
    else: 
        model.add(Activation('softmax'))
        
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=1e-4),
                  loss=CategoricalCrossentropy(from_logits=enable_logits),
                  metrics=['accuracy'])
    
    return model

In [10]:
ge_flat_clf = build_ganeval_model_flat()
ge_flat_clf.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 32, 32, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 32, 32, 3)         30        
_________________________________________________________________
keras_layer (KerasLayer)     (None, 10)                7796426   
_________________________________________________________________
activation (Activation)      (None, 10)                0         
Total params: 7,796,456
Trainable params: 30
Non-trainable params: 7,796,426
_________________________________________________________________


In [12]:
ge_cifar_clf = build_ganeval_model(enable_logits=True)
ge_cifar_clf.summary()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 10)                7796426   
_________________________________________________________________
activation (Activation)      (None, 10)                0         
Total params: 7,796,426
Trainable params: 0
Non-trainable params: 7,796,426
_________________________________________________________________


### Testing the retrieved hub model by evaluating on train data
Should give us 100% or something close to that

In [13]:
ge_cifar_clf.evaluate(testd, testl)







[0.001668029203079641, 1.0]

### Testing a sample attack

In [21]:
classifier = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
attack_fgsm = FastGradientMethod(classifier=classifier, eps=0.4)

In [22]:
x_test_adv = attack_fgsm.generate(testd.copy())

In [23]:
ge_cifar_clf.evaluate(x_test_adv, testl)



[6.933242935180664, 0.102]

### Define the substitute classifier for our model

In [13]:
# def build_substitute_model(enable_logits=True):
#     model = tf.keras.Sequential( )
#     model.add( Conv2D( 32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(32,32,3) ) )
#     model.add( Conv2D( 64, (3, 3), padding='same', activation='relu' ) )
#     model.add( MaxPooling2D( pool_size=(2, 2) ) )
#     model.add( Flatten( ) )
#     model.add( Dense( 128, activation='relu' ) )
#     if enable_logits:
#         model.add( Dense(10, activation='linear' ) )
#     else:
#         model.add( Dense(10, activation='softmax' ) )
        
#     model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=enable_logits),
#                   optimizer='adam',
#                   metrics=['accuracy'] )
    

    
#     return model

In [44]:
def build_substitute_model(enable_logits=True):
    model = tf.keras.Sequential( )
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=(32,32,3)))
    model.add(Activation("relu"))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    if enable_logits:
        model.add(Activation("linear"))
    else:
        model.add(Activation("softmax"))
        
    model.compile(loss=CategoricalCrossentropy(from_logits=enable_logits),
                  optimizer='adam',
                  metrics=['accuracy'] )
    
    return model

In [45]:
x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)

(1000, 32, 32, 3) (1000, 10)
(1000, 32, 32, 3) (1000, 10)


In [46]:
def extraction_attack(ART_attack, x_train, y_train, fractions, epoch=30, verbose=0):
    
    data_size = x_train.shape[0]
    epochs = epoch
    
    ### Train and Test sample data from each class to get final results
    print("Extract 100 Training samples from each class:")
    x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
    print("Extract 100 Test samples from each class:")
    x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)
    
    ge_cifar_clf = build_ganeval_model()
    cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
    
    loss1, acc1 = cloud_art_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
    loss2, acc2 = cloud_art_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
    
    clf_results = []
    
    for each_frac in fractions:
        max_queries = int(data_size*each_frac/100)
        print("Attacking the victim with %d percent of training data: %d queries..."%(each_frac, max_queries))
#         partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=each_frac)
        
        sub_clf = build_substitute_model()
        stolen_clf = KerasClassifier(model=sub_clf, clip_values=(0, 1), use_logits=True)
        
        attack = ART_attack(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=epochs,
                        nb_stolen=max_queries)
        
        stolen_clf = attack.extract(x_train, thieved_classifier=stolen_clf)

        loss3, acc3 = stolen_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
        loss4, acc4 = stolen_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
        
        clf_results.append((ART_attack.__name__, each_frac, max_queries, epochs, (round(loss1,4), acc1), (round(loss2,4), acc2), 
                            (round(loss3,4), acc3), (round(loss4,4), acc4)))
        
        results_df = pd.DataFrame(clf_results, columns=['Attack', 'Fraction of data', 'Max queries', 'Epochs trained', 'Victim clf on Train data',
                                                       'Victim clf on Test data', 'Stolen clf on Train data', 
                                                        'Stolen clf on Test data'])
    return results_df
        
        

In [96]:
KON_results_20e = extraction_attack(KnockoffNets, x_train, y_train, [5,10,20,30], epoch=20)
KON_results_30e = extraction_attack(KnockoffNets, x_train, y_train, [5,10,20,30], epoch=30)
KON_results_40e = extraction_attack(KnockoffNets, x_train, y_train, [5,10,20,30], epoch=40)

Extract 100 Training samples from each class:
(1000, 32, 32, 3) (1000, 10)
Extract 100 Test samples from each class:
(1000, 32, 32, 3) (1000, 10)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Attacking the victim with 5 percent of training data: 2500 queries...
Attacking the victim with 10 percent of training data: 5000 queries...
Attacking the victim with 20 percent of training data: 10000 queries...
Attacking the victim with 30 percent of training data: 15000 queries...
Extract 100 Training samples from each class:
(1000, 32, 32, 3) (1000, 10)
Extract 100 Test samples from each class:
(1000, 32, 32, 3) (1000, 10)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Attacking the victim with 5 percent of training data: 2500 queries...
Attacking the victim with 10 percent of training data: 5000 queries...
Attacking the victim with 20 percent of training data: 10000 queries...
Attacking the victim with 30 percent of training data: 15000 queries...
Extract 100 Training samples from each class:
(1000, 32, 32, 3) (1000, 10)
Extract 100 Test samples from each class:
(1000, 32, 32, 3) (1000, 10)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Attacking the victim with 5 percent of training data: 2500 queries...
Attacking the victim with 10 percent of training data: 5000 queries...
Attacking the victim with 20 percent of training data: 10000 queries...
Attacking the victim with 30 percent of training data: 15000 queries...


In [97]:
KON_results_20e.head()

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,KnockoffNets,5,2500,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.8743, 0.52)","(1.8492, 0.515)"
1,KnockoffNets,10,5000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.3711, 0.619)","(1.5513, 0.583)"
2,KnockoffNets,20,10000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.0375, 0.723)","(1.2095, 0.653)"
3,KnockoffNets,30,15000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7953, 0.75)","(1.0285, 0.697)"


In [98]:
KON_results_30e.head()

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,KnockoffNets,5,2500,30,"(0.0017, 1.0)","(0.2134, 0.944)","(2.1656, 0.545)","(2.3571, 0.525)"
1,KnockoffNets,10,5000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.378, 0.636)","(1.5988, 0.593)"
2,KnockoffNets,20,10000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.091, 0.726)","(1.4035, 0.66)"
3,KnockoffNets,30,15000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7677, 0.791)","(0.9552, 0.708)"


In [99]:
KON_results_40e.head()

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,KnockoffNets,5,2500,40,"(0.0017, 1.0)","(0.2134, 0.944)","(2.6595, 0.506)","(2.5057, 0.52)"
1,KnockoffNets,10,5000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.5156, 0.65)","(1.7004, 0.603)"
2,KnockoffNets,20,10000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.2484, 0.714)","(1.5209, 0.642)"
3,KnockoffNets,30,15000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(0.8399, 0.772)","(1.0338, 0.71)"


In [100]:
CCC_results_20e = extraction_attack(CopycatCNN, x_train, y_train, [5,10,20,30], epoch=20)
CCC_results_30e = extraction_attack(CopycatCNN, x_train, y_train, [5,10,20,30], epoch=30)
CCC_results_40e = extraction_attack(CopycatCNN, x_train, y_train, [5,10,20,30], epoch=40)

Extract 100 Training samples from each class:
(1000, 32, 32, 3) (1000, 10)
Extract 100 Test samples from each class:
(1000, 32, 32, 3) (1000, 10)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Attacking the victim with 5 percent of training data: 2500 queries...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Attacking the victim with 10 percent of training data: 5000 queries...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Attacking the victim with 20 percent of training data: 10000 queries...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Attacking the victim with 30 percent of training data: 15000 queries...
Epoch 1/20
Epoch 2/20


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Attacking the victim with 5 percent of training data: 2500 queries...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Attacking the victim with 10 percent of training data: 5000 queries...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Attacking the victim with 20 percent of training data: 10000 queries...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Attacking the victim with 5 percent of training data: 2500 queries...
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Attacking the victim with 10 percent of training data: 5000 queries...
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
E

In [101]:
CCC_results_20e.head()

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,CopycatCNN,5,2500,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.566, 0.539)","(1.6998, 0.532)"
1,CopycatCNN,10,5000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.5256, 0.623)","(1.698, 0.584)"
2,CopycatCNN,20,10000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.0333, 0.726)","(1.1957, 0.665)"
3,CopycatCNN,30,15000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7759, 0.766)","(0.9894, 0.69)"


In [102]:
CCC_results_30e.head()

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,CopycatCNN,5,2500,30,"(0.0017, 1.0)","(0.2134, 0.944)","(2.0025, 0.535)","(1.9658, 0.524)"
1,CopycatCNN,10,5000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.4955, 0.646)","(1.7299, 0.595)"
2,CopycatCNN,20,10000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.048, 0.723)","(1.2566, 0.675)"
3,CopycatCNN,30,15000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(0.6689, 0.806)","(1.0855, 0.694)"


In [103]:
CCC_results_40e.head()

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,CopycatCNN,5,2500,40,"(0.0017, 1.0)","(0.2134, 0.944)","(2.3161, 0.557)","(2.2642, 0.522)"
1,CopycatCNN,10,5000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.7464, 0.639)","(2.0327, 0.55)"
2,CopycatCNN,20,10000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.2221, 0.721)","(1.4793, 0.668)"
3,CopycatCNN,30,15000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7929, 0.788)","(0.9659, 0.725)"


In [106]:
EA_results = pd.concat([KON_results_20e,KON_results_30e,KON_results_40e,CCC_results_20e,CCC_results_30e,CCC_results_40e])
EA_results.head(24)

Unnamed: 0,Attack,Fraction of data,Max queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,KnockoffNets,5,2500,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.8743, 0.52)","(1.8492, 0.515)"
1,KnockoffNets,10,5000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.3711, 0.619)","(1.5513, 0.583)"
2,KnockoffNets,20,10000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(1.0375, 0.723)","(1.2095, 0.653)"
3,KnockoffNets,30,15000,20,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7953, 0.75)","(1.0285, 0.697)"
0,KnockoffNets,5,2500,30,"(0.0017, 1.0)","(0.2134, 0.944)","(2.1656, 0.545)","(2.3571, 0.525)"
1,KnockoffNets,10,5000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.378, 0.636)","(1.5988, 0.593)"
2,KnockoffNets,20,10000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.091, 0.726)","(1.4035, 0.66)"
3,KnockoffNets,30,15000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7677, 0.791)","(0.9552, 0.708)"
0,KnockoffNets,5,2500,40,"(0.0017, 1.0)","(0.2134, 0.944)","(2.6595, 0.506)","(2.5057, 0.52)"
1,KnockoffNets,10,5000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.5156, 0.65)","(1.7004, 0.603)"


In [105]:
EA_results.to_csv("Extraction attacks results.csv")

In [65]:
partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=10)

(5000, 32, 32, 3) (5000, 10)


## Copycat CNN attack NPD-SL + PD-SL

In [98]:
def CCC_extraction_attack(npd_data, x_train, y_train, fractions, epoch=30, verbose=0):
    
    data_size = x_train.shape[0]
    epochs = epoch
    npd_size = npd_data.shape[0]
    
    ### Train and Test sample data from each class to get final results
    print("Extract 100 Training samples from each class:")
    x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
    print("Extract 100 Test samples from each class:")
    x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)
    
    ge_cifar_clf = build_ganeval_model()
    cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
    
    loss1, acc1 = cloud_art_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
    loss2, acc2 = cloud_art_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
            
    sub_clf = build_substitute_model()
    stolen_clf = KerasClassifier(model=sub_clf, clip_values=(0, 1), use_logits=True)
    
    attack = CopycatCNN(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=epochs,
                        nb_stolen=npd_size)
    
    stolen_clf = attack.extract(npd_data, thieved_classifier=stolen_clf)
    
    stolen_clf._model.save_weights("CCC_temp.h5")
    
    clf_results = []
    
    for each_frac in fractions:

        max_queries = int(data_size*each_frac/100)
        print("Attacking the victim with %d percent of training data: %d queries..."%(each_frac, max_queries))
        
        stolen_clf._model.load_weights("CCC_temp.h5")
        partial_x_train, _ = subset_data(x_train, y_train, fraction=each_frac)

        stolen_clf = attack.extract(partial_x_train, thieved_classifier=stolen_clf)
        
        loss3, acc3 = stolen_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
        loss4, acc4 = stolen_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
        
        clf_results.append(("Copycat CNN", npd_size, each_frac, max_queries, epochs, 
                            (round(loss1,4), acc1), 
                            (round(loss2,4), acc2), 
                            (round(loss3,4), acc3), 
                            (round(loss4,4), acc4)))
        
        results_df = pd.DataFrame(clf_results, columns=['Attack', "NPD Queries", 'Fraction of train data', 
                                                        'Train queries', 
                                                        'Epochs trained', 'Victim clf on Train data',
                                                       'Victim clf on Test data', 'Stolen clf on Train data', 
                                                        'Stolen clf on Test data'])
    return results_df
        
        

In [99]:
from tensorflow.keras.datasets import cifar100

In [100]:
(cifar100_x, cifar100_y), _ = cifar100.load_data(label_mode='fine')
print('cifar100 training data shape:', cifar100_x.shape)

# Normalize pixel values
cifar100_x = cifar100_x/255

# Sample 20,000 data points randomly from the data
idx = np.random.choice(cifar100_x.shape[0], 20000, replace=False)
cifar100_x = cifar100_x[idx].copy()
cifar100_y = cifar100_y[idx].copy()

print('Final NPD size of cifar100 training data:', cifar100_x.shape)

cifar100 training data shape: (50000, 32, 32, 3)
Final NPD size of cifar100 training data: (20000, 32, 32, 3)


In [101]:
CCC_results_30e = CCC_extraction_attack(cifar100_x, x_train, y_train, [5,10,20,30], epoch=30)
CCC_results_40e = CCC_extraction_attack(cifar100_x, x_train, y_train, [5,10,20,30], epoch=40)

Extract 100 Training samples from each class:
(1000, 32, 32, 3) (1000, 10)
Extract 100 Test samples from each class:
(1000, 32, 32, 3) (1000, 10)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore






Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Attacking the victim with 5 percent of training data: 2500 queries...




(2500, 32, 32, 3) (2500, 10)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




Attacking the victim with 10 percent of training data: 5000 queries...
(5000, 32, 32, 3) (5000, 10)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Attacking the victim with 20 percent of training data: 10000 queries...




(10000, 32, 32, 3) (10000, 10)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Attacking the victim with 30 percent of training data: 15000 queries...




(15000, 32, 32, 3) (15000, 10)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Extract 100 Training samples from each class:
(1000, 32, 32, 3) (1000, 10)
Extract 100 Test samples from each class:
(1000, 32, 32, 3) (1000, 10)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore






Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Attacking the victim with 5 percent of training data: 2500 queries...




(2500, 32, 32, 3) (2500, 10)
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Attacking the victim with 10 percent of training data: 5000 queries...




(5000, 32, 32, 3) (5000, 10)
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Attacking the victim with 20 percent of training data: 10000 queries...




(10000, 32, 32, 3) (10000, 10)
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Attacking the victim with 30 percent of training data: 15000 queries...




(15000, 32, 32, 3) (15000, 10)
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [102]:
CCC_results_30e.head()

Unnamed: 0,Attack,NPD Queries,Fraction of train data,Train queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,Copycat CNN,20000,5,2500,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.5548, 0.628)","(1.5579, 0.601)"
1,Copycat CNN,20000,10,5000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.3367, 0.664)","(1.3935, 0.652)"
2,Copycat CNN,20000,20,10000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.0165, 0.73)","(1.1472, 0.676)"
3,Copycat CNN,20000,30,15000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7559, 0.799)","(1.0688, 0.713)"


In [103]:
CCC_results_40e.head()

Unnamed: 0,Attack,NPD Queries,Fraction of train data,Train queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,Copycat CNN,20000,5,2500,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.679, 0.619)","(1.755, 0.599)"
1,Copycat CNN,20000,10,5000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.4419, 0.676)","(1.6044, 0.622)"
2,Copycat CNN,20000,20,10000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.1687, 0.707)","(1.3947, 0.656)"
3,Copycat CNN,20000,30,15000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(0.8209, 0.779)","(1.0484, 0.705)"


In [104]:
CCC_results = pd.concat([CCC_results_30e,CCC_results_40e])
CCC_results.head(24)

Unnamed: 0,Attack,NPD Queries,Fraction of train data,Train queries,Epochs trained,Victim clf on Train data,Victim clf on Test data,Stolen clf on Train data,Stolen clf on Test data
0,Copycat CNN,20000,5,2500,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.5548, 0.628)","(1.5579, 0.601)"
1,Copycat CNN,20000,10,5000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.3367, 0.664)","(1.3935, 0.652)"
2,Copycat CNN,20000,20,10000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(1.0165, 0.73)","(1.1472, 0.676)"
3,Copycat CNN,20000,30,15000,30,"(0.0017, 1.0)","(0.2134, 0.944)","(0.7559, 0.799)","(1.0688, 0.713)"
0,Copycat CNN,20000,5,2500,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.679, 0.619)","(1.755, 0.599)"
1,Copycat CNN,20000,10,5000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.4419, 0.676)","(1.6044, 0.622)"
2,Copycat CNN,20000,20,10000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(1.1687, 0.707)","(1.3947, 0.656)"
3,Copycat CNN,20000,30,15000,40,"(0.0017, 1.0)","(0.2134, 0.944)","(0.8209, 0.779)","(1.0484, 0.705)"


In [105]:
CCC_results.to_csv("Extraction attack - Copycat CNN results.csv")

In [68]:
ge_cifar_clf = build_ganeval_model()
sub_clf1 = build_substitute_model()
sub_clf2 = build_substitute_model()
# sub_clf1.summary()
# sub_clf2.summary()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [69]:
cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
stolen_clf1 = KerasClassifier(model=sub_clf1, clip_values=(0, 1), use_logits=True)
stolen_clf2 = KerasClassifier(model=sub_clf2, clip_values=(0, 1), use_logits=True)

In [73]:
attack_CCC = CopycatCNN(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=20,
                        nb_stolen=20000)

In [None]:
stolen_clf1 = attack_CCC.extract(cifar100_x, thieved_classifier=stolen_clf1)

In [76]:
sub_clf1.save_weights("CCC_temp.h5")

In [None]:
stolen_clf1 = attack_CCC.extract(partial_x_train, thieved_classifier=stolen_clf1)

In [82]:
stolen_clf1._model.save_weights("CCC_temp1.h5")

In [None]:
stolen_clf1 = attack_CCC.extract(partial_x_train, thieved_classifier=stolen_clf1)

In [80]:
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)



[1.6621611442565918, 0.4418]

In [78]:
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)
cloud_art_clf._model.evaluate(x_train_adv, y_train_adv)
stolen_clf1._model.evaluate(x_train_adv, y_train_adv)
cloud_art_clf._model.evaluate(x_test_adv, y_test_adv)
stolen_clf1._model.evaluate(x_test_adv, y_test_adv)



[1.4328741931915283, 0.613]

In [52]:
%%time
attack_KN = KnockoffNets(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=2,
                        nb_stolen=2000,
                        sampling_strategy='random')
stolen_clf2 = attack_KN.extract(partial_x_train, partial_y_train, thieved_classifier=stolen_clf2)



CPU times: user 1min 19s, sys: 1.08 s, total: 1min 21s
Wall time: 12.8 s


In [None]:
ge_flat_clf = build_ganeval_model_flat()
ge_flat_clf.summary()

partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=0.2)
partial_x_train = partial_x_train[:,:,:,0]

x_mean = partial_x_train.mean()
x_std = partial_x_train.std()

partial_x_train = (partial_x_train.reshape(-1,32*32*1) - x_mean)/x_std
partial_x_train.shape

In [None]:
ge_flat_clf.evaluate(partial_x_train, partial_y_train)

flat_art_clf = KerasClassifier(model=ge_flat_clf, clip_values=(0, 1), use_logits=True)

attack_FEN = FunctionallyEquivalentExtraction(classifier=flat_art_clf, num_neurons=10)
stolen_clf_FEN = attack_FEN.extract(partial_x_train)

  t_hat = t_1 + np.divide(y_2 - y_1 - (t_2 - t_1) * m_2, m_1 - m_2)
  y_hat = y_1 + m_1 * np.divide(y_2 - y_1 - (t_2 - t_1) * m_2, m_1 - m_2)
  np.sum(np.abs((y_hat - y) / y) < rel_diff_value) > fraction_true * self.num_classes
  if np.sum(np.abs((m_1 - m_2) / m_1) < rel_diff_slope) > fraction_true * self.num_classes:
  if np.sum(np.abs((m_1 - m_2) / m_1) < rel_diff_slope) > fraction_true * self.num_classes:
  if np.sum(np.abs((m_1 - m_2) / m_1) < rel_diff_slope) > fraction_true * self.num_classes:
  y_hat = y_1 + m_1 * np.divide(y_2 - y_1 - (t_2 - t_1) * m_2, m_1 - m_2)
  t_hat = t_1 + np.divide(y_2 - y_1 - (t_2 - t_1) * m_2, m_1 - m_2)
  y_hat = y_1 + m_1 * np.divide(y_2 - y_1 - (t_2 - t_1) * m_2, m_1 - m_2)
  m_1 = (self._o_l(x_1_p) - self._o_l(x_1)) / epsilon
  m_2 = (self._o_l(x_2) - self._o_l(x_2_m)) / epsilon


KeyboardInterrupt: 

In [58]:
### Sanity check
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)

stolen_clf2._model.evaluate(partial_x_train, partial_y_train)

# stolen_clf_FEN._model.evaluate(partial_x_train, partial_y_train)



[1.8248753637313844, 0.32066667]

In [59]:
cloud_art_clf._model.evaluate(x_train_adv, y_train_adv)
stolen_clf1._model.evaluate(x_train_adv, y_train_adv)
stolen_clf2._model.evaluate(x_train_adv, y_train_adv)
# stolen_clf_FEN._model.evaluate(x_train_adv, y_train_adv)



[1.806689208984375, 0.32]

In [60]:
cloud_art_clf._model.evaluate(x_test_adv, y_test_adv)
stolen_clf1._model.evaluate(x_test_adv, y_test_adv)
stolen_clf2._model.evaluate(x_test_adv, y_test_adv)
# stolen_clf_FEN._model.evaluate(x_test_adv, y_test_adv)



[1.8274783573150635, 0.322]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


cifar100 training data shape: (50000, 32, 32, 3)
