In [1]:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

import tensorflow as tf

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline

import tensorflow_hub as thub
import tensorflow.keras.backend as K

from lib.data_utils import *

In [2]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation
from tensorflow.keras.losses import CategoricalCrossentropy

In [3]:
# from tensorflow.keras.models import Model
from tensorflow.keras.layers import InputLayer, Reshape
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical


In [4]:
from art.classifiers import KerasClassifier
from art.attacks import FastGradientMethod
from art.attacks.extraction import CopycatCNN, FunctionallyEquivalentExtraction, KnockoffNets
import art

In [None]:
def sample_by_class(data, labels, num_samples=100):
    sample_data = []
    sample_labels = []
    unq_labels = list(range(labels.shape[1]))
    for label in unq_labels:
        idx = labels[:,label]==1
        sample_set = data[idx][0:num_samples].copy()
        label_set = labels[idx][0:num_samples].copy()
        sample_data.append(sample_set)
        sample_labels.append(label_set)
    
    sample_data = np.concatenate(sample_data)
    sample_labels = np.concatenate(sample_labels)
    print(sample_data.shape, sample_labels.shape)
    return sample_data, sample_labels

def subset_data(data, labels, fraction=5):
    data_size = data.shape[0]
    out_size = int(data_size*fraction/100)
    idx = np.random.choice(data_size, out_size, replace=False)
    out_data = data[idx].copy()
    out_labels = labels[idx].copy()
    
    print(out_data.shape, out_labels.shape)
    return out_data, out_labels

In [None]:
# Load the raw CIFAR-10 data
cifar10_dir = 'lib/datasets/cifar-10-batches-py'
x_train, y_train, x_test, y_test = load_cifar10(cifar10_dir)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
testd = x_train[0:1000].copy()
testl = y_train[0:1000].copy()

In [None]:
def build_ganeval_model(enable_logits=True):
    K.clear_session()
    model_url = "https://tfhub.dev/deepmind/ganeval-cifar10-convnet/1"
    ganeval_module = thub.Module(model_url)
    
    model = Sequential()
    model.add(InputLayer(input_shape=(32,32,3)))
    model.add(thub.KerasLayer(ganeval_module))
    if enable_logits:
        model.add(Activation('linear'))
    else: 
        model.add(Activation('softmax'))
        
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=1e-4),
                  loss=CategoricalCrossentropy(from_logits=enable_logits),
                  metrics=['accuracy'])
    
    return model

In [None]:
def build_ganeval_model_flat(enable_logits=True):
    K.clear_session()
    model_url = "https://tfhub.dev/deepmind/ganeval-cifar10-convnet/1"
    ganeval_module = thub.Module(model_url)
    
    model = Sequential()
#     model.add(InputLayer(input_shape=(32*32*3,)))
    model.add(Reshape((32,32,1),input_shape=(32*32*1,)))
    model.add(Conv2D(3, (3, 3), activation='relu', padding='same'))
    model.add(thub.KerasLayer(ganeval_module))
    if enable_logits:
        model.add(Activation('linear'))
    else: 
        model.add(Activation('softmax'))
        
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=1e-4),
                  loss=CategoricalCrossentropy(from_logits=enable_logits),
                  metrics=['accuracy'])
    
    return model

In [None]:
ge_flat_clf = build_ganeval_model_flat()
ge_flat_clf.summary()

In [None]:
ge_cifar_clf = build_ganeval_model(enable_logits=True)
ge_cifar_clf.summary()

### Testing the retrieved hub model by evaluating on train data
Should give us 100% or something close to that

In [None]:
ge_cifar_clf.evaluate(testd, testl)

### Testing a sample attack

In [None]:
classifier = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
attack_fgsm = FastGradientMethod(classifier=classifier, eps=0.4)

In [None]:
x_test_adv = attack_fgsm.generate(testd.copy())

In [None]:
ge_cifar_clf.evaluate(x_test_adv, testl)

### Define the substitute classifier for our model

In [None]:
# def build_substitute_model(enable_logits=True):
#     model = tf.keras.Sequential( )
#     model.add( Conv2D( 32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(32,32,3) ) )
#     model.add( Conv2D( 64, (3, 3), padding='same', activation='relu' ) )
#     model.add( MaxPooling2D( pool_size=(2, 2) ) )
#     model.add( Flatten( ) )
#     model.add( Dense( 128, activation='relu' ) )
#     if enable_logits:
#         model.add( Dense(10, activation='linear' ) )
#     else:
#         model.add( Dense(10, activation='softmax' ) )
        
#     model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=enable_logits),
#                   optimizer='adam',
#                   metrics=['accuracy'] )
    

    
#     return model

In [5]:
def build_substitute_model(enable_logits=True):
    model = tf.keras.Sequential( )
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=(32,32,3)))
    model.add(Activation("relu"))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    if enable_logits:
        model.add(Activation("linear"))
    else:
        model.add(Activation("softmax"))
        
    model.compile(loss=CategoricalCrossentropy(from_logits=enable_logits),
                  optimizer='adam',
                  metrics=['accuracy'] )
    
    return model

In [None]:
x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)

In [None]:
def extraction_attack(ART_attack, x_train, y_train, fractions, epoch=30, verbose=0):
    
    data_size = x_train.shape[0]
    epochs = epoch
    
    ### Train and Test sample data from each class to get final results
    print("Extract 100 Training samples from each class:")
    x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
    print("Extract 100 Test samples from each class:")
    x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)
    
    ge_cifar_clf = build_ganeval_model()
    cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
    
    loss1, acc1 = cloud_art_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
    loss2, acc2 = cloud_art_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
    
    clf_results = []
    
    for each_frac in fractions:
        max_queries = int(data_size*each_frac/100)
        print("Attacking the victim with %d percent of training data: %d queries..."%(each_frac, max_queries))
#         partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=each_frac)
        
        sub_clf = build_substitute_model()
        stolen_clf = KerasClassifier(model=sub_clf, clip_values=(0, 1), use_logits=True)
        
        attack = ART_attack(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=epochs,
                        nb_stolen=max_queries)
        
        stolen_clf = attack.extract(x_train, thieved_classifier=stolen_clf)

        loss3, acc3 = stolen_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
        loss4, acc4 = stolen_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
        
        clf_results.append((ART_attack.__name__, each_frac, max_queries, epochs, (round(loss1,4), acc1), (round(loss2,4), acc2), 
                            (round(loss3,4), acc3), (round(loss4,4), acc4)))
        
        results_df = pd.DataFrame(clf_results, columns=['Attack', 'Fraction of data', 'Max queries', 'Epochs trained', 'Victim clf on Train data',
                                                       'Victim clf on Test data', 'Stolen clf on Train data', 
                                                        'Stolen clf on Test data'])
    return results_df
        
        

In [None]:
KON_results_20e = extraction_attack(KnockoffNets, x_train, y_train, [5,10,20,30], epoch=20)
KON_results_30e = extraction_attack(KnockoffNets, x_train, y_train, [5,10,20,30], epoch=30)
KON_results_40e = extraction_attack(KnockoffNets, x_train, y_train, [5,10,20,30], epoch=40)

In [None]:
KON_results_20e.head()

In [None]:
KON_results_30e.head()

In [None]:
KON_results_40e.head()

In [None]:
CCC_results_20e = extraction_attack(CopycatCNN, x_train, y_train, [5,10,20,30], epoch=20)
CCC_results_30e = extraction_attack(CopycatCNN, x_train, y_train, [5,10,20,30], epoch=30)
CCC_results_40e = extraction_attack(CopycatCNN, x_train, y_train, [5,10,20,30], epoch=40)

In [None]:
CCC_results_20e.head()

In [None]:
CCC_results_30e.head()

In [None]:
CCC_results_40e.head()

In [None]:
EA_results = pd.concat([KON_results_20e,KON_results_30e,KON_results_40e,CCC_results_20e,CCC_results_30e,CCC_results_40e])
EA_results.head(24)

In [None]:
EA_results.to_csv("Extraction attacks results.csv")

In [None]:
partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=10)

## Copycat CNN attack NPD-SL + PD-SL

In [None]:
def CCC_extraction_attack(npd_data, x_train, y_train, fractions, epoch=30, verbose=0):
    
    data_size = x_train.shape[0]
    epochs = epoch
    npd_size = npd_data.shape[0]
    
    ### Train and Test sample data from each class to get final results
    print("Extract 100 Training samples from each class:")
    x_train_adv, y_train_adv = sample_by_class(x_train, y_train, num_samples=100)
    print("Extract 100 Test samples from each class:")
    x_test_adv, y_test_adv = sample_by_class(x_test, y_test, num_samples=100)
    
    ge_cifar_clf = build_ganeval_model()
    cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
    
    loss1, acc1 = cloud_art_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
    loss2, acc2 = cloud_art_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
            
    sub_clf = build_substitute_model()
    stolen_clf = KerasClassifier(model=sub_clf, clip_values=(0, 1), use_logits=True)
    
    attack = CopycatCNN(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=epochs,
                        nb_stolen=npd_size)
    
    stolen_clf = attack.extract(npd_data, thieved_classifier=stolen_clf)
    
    stolen_clf._model.save_weights("CCC_temp.h5")
    
    clf_results = []
    
    for each_frac in fractions:

        max_queries = int(data_size*each_frac/100)
        print("Attacking the victim with %d percent of training data: %d queries..."%(each_frac, max_queries))
        
        stolen_clf._model.load_weights("CCC_temp.h5")
        partial_x_train, _ = subset_data(x_train, y_train, fraction=each_frac)

        stolen_clf = attack.extract(partial_x_train, thieved_classifier=stolen_clf)
        
        loss3, acc3 = stolen_clf._model.evaluate(x_train_adv, y_train_adv, verbose=verbose)
        loss4, acc4 = stolen_clf._model.evaluate(x_test_adv, y_test_adv, verbose=verbose)
        
        clf_results.append(("Copycat CNN", npd_size, each_frac, max_queries, epochs, 
                            (round(loss1,4), acc1), 
                            (round(loss2,4), acc2), 
                            (round(loss3,4), acc3), 
                            (round(loss4,4), acc4)))
        
        results_df = pd.DataFrame(clf_results, columns=['Attack', "NPD Queries", 'Fraction of train data', 
                                                        'Train queries', 
                                                        'Epochs trained', 'Victim clf on Train data',
                                                       'Victim clf on Test data', 'Stolen clf on Train data', 
                                                        'Stolen clf on Test data'])
    return results_df
        
        

In [None]:
from tensorflow.keras.datasets import cifar100

In [None]:
(cifar100_x, cifar100_y), _ = cifar100.load_data(label_mode='fine')
print('cifar100 training data shape:', cifar100_x.shape)

# Normalize pixel values
cifar100_x = cifar100_x/255

# Sample 20,000 data points randomly from the data
idx = np.random.choice(cifar100_x.shape[0], 20000, replace=False)
cifar100_x = cifar100_x[idx].copy()
cifar100_y = cifar100_y[idx].copy()

print('Final NPD size of cifar100 training data:', cifar100_x.shape)

In [None]:
CCC_results_30e = CCC_extraction_attack(cifar100_x, x_train, y_train, [5,10,20,30], epoch=30)
CCC_results_40e = CCC_extraction_attack(cifar100_x, x_train, y_train, [5,10,20,30], epoch=40)

In [None]:
CCC_results_30e.head()

In [None]:
CCC_results_40e.head()

In [None]:
CCC_results = pd.concat([CCC_results_30e,CCC_results_40e])
CCC_results.head(24)

In [None]:
CCC_results.to_csv("Extraction attack - Copycat CNN results.csv")

In [6]:
# ge_cifar_clf = build_ganeval_model()
sub_clf1 = build_substitute_model()
# sub_clf2 = build_substitute_model()
sub_clf1.summary()
# sub_clf2.summary()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        896       
_________________________________________________________________
activation (Activation)      (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 30, 30, 32)        9248      
_________________________________________________________________
activation_1 (Activation)    (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 15, 15, 32)        0         
____

In [None]:
cloud_art_clf = KerasClassifier(model=ge_cifar_clf, clip_values=(0, 1), use_logits=True)
stolen_clf1 = KerasClassifier(model=sub_clf1, clip_values=(0, 1), use_logits=True)
stolen_clf2 = KerasClassifier(model=sub_clf2, clip_values=(0, 1), use_logits=True)

In [None]:
attack_CCC = CopycatCNN(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=20,
                        nb_stolen=20000)

In [None]:
stolen_clf1 = attack_CCC.extract(cifar100_x, thieved_classifier=stolen_clf1)

In [None]:
sub_clf1.save_weights("CCC_temp.h5")

In [None]:
stolen_clf1 = attack_CCC.extract(partial_x_train, thieved_classifier=stolen_clf1)

In [None]:
stolen_clf1._model.save_weights("CCC_temp1.h5")

In [None]:
stolen_clf1 = attack_CCC.extract(partial_x_train, thieved_classifier=stolen_clf1)

In [None]:
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)

In [None]:
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)
cloud_art_clf._model.evaluate(x_train_adv, y_train_adv)
stolen_clf1._model.evaluate(x_train_adv, y_train_adv)
cloud_art_clf._model.evaluate(x_test_adv, y_test_adv)
stolen_clf1._model.evaluate(x_test_adv, y_test_adv)

In [None]:
%%time
attack_KN = KnockoffNets(classifier=cloud_art_clf, 
                        batch_size_fit=32, 
                        batch_size_query=32,
                        nb_epochs=2,
                        nb_stolen=2000,
                        sampling_strategy='random')
stolen_clf2 = attack_KN.extract(partial_x_train, partial_y_train, thieved_classifier=stolen_clf2)

In [None]:
ge_flat_clf = build_ganeval_model_flat()
ge_flat_clf.summary()

partial_x_train, partial_y_train = subset_data(x_train, y_train, fraction=0.2)
partial_x_train = partial_x_train[:,:,:,0]

x_mean = partial_x_train.mean()
x_std = partial_x_train.std()

partial_x_train = (partial_x_train.reshape(-1,32*32*1) - x_mean)/x_std
partial_x_train.shape

In [None]:
ge_flat_clf.evaluate(partial_x_train, partial_y_train)

flat_art_clf = KerasClassifier(model=ge_flat_clf, clip_values=(0, 1), use_logits=True)

attack_FEN = FunctionallyEquivalentExtraction(classifier=flat_art_clf, num_neurons=10)
stolen_clf_FEN = attack_FEN.extract(partial_x_train)

In [None]:
### Sanity check
stolen_clf1._model.evaluate(partial_x_train, partial_y_train)

stolen_clf2._model.evaluate(partial_x_train, partial_y_train)

# stolen_clf_FEN._model.evaluate(partial_x_train, partial_y_train)

In [None]:
cloud_art_clf._model.evaluate(x_train_adv, y_train_adv)
stolen_clf1._model.evaluate(x_train_adv, y_train_adv)
stolen_clf2._model.evaluate(x_train_adv, y_train_adv)
# stolen_clf_FEN._model.evaluate(x_train_adv, y_train_adv)

In [None]:
cloud_art_clf._model.evaluate(x_test_adv, y_test_adv)
stolen_clf1._model.evaluate(x_test_adv, y_test_adv)
stolen_clf2._model.evaluate(x_test_adv, y_test_adv)
# stolen_clf_FEN._model.evaluate(x_test_adv, y_test_adv)