In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras import backend as K
import tensorflow as tf
#from keras.backend.tensorflow_backend import set_session
import numpy as np
import os
#import dataaug
#from keras.backend import set_session
from tensorflow.keras.utils import to_categorical

In [None]:
batch_size = 16
num_classes = 10
epochs = 200

# input image dimensions
img_rows, img_cols = 28, 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

input_shape = x_train.shape[1:]

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [None]:
#count label
def count_labels(label):
    count_train_labels = np.zeros([1, num_classes], dtype=int)
    for i in range(label.shape[0]):
        a = np.argmax(label[i])
        count_train_labels[np.arange(0, 1), a] += 1
    return (count_train_labels, np.sum(count_train_labels))

In [None]:
#poisoning label
def poisoning_labels(label, size):
    #print(count_labels(label))
    for i in range(size):
        a = np.argmax(label[i])
        label[i][a] = 0
        if a == 0:
            label[i][1] = 1
        else: label[i][0] = 1
    #print((count_labels(label)))
    return(label)       
poisoning_labels(y_train,50)

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def DataGeneratorFunMNIST():
    datagen = ImageDataGenerator(
        # set input mean to 0 over the dataset
        featurewise_center=False,
        # set each sample mean to 0
        samplewise_center=False,
        # divide inputs by std of dataset
        featurewise_std_normalization=False,
        # divide each input by its std
        samplewise_std_normalization=False,
        # apply ZCA whitening
        zca_whitening=False,
        # epsilon for ZCA whitening
        zca_epsilon=1e-06,
        # randomly rotate images in the range (deg 0 to 180)
        rotation_range=10,
        # randomly shift images horizontally
        width_shift_range=0.1,
        # randomly shift images vertically
        height_shift_range=0.1,
        # set range for random shear
        shear_range=0.0,
        # set range for random zoom
        zoom_range=0.0,
        # set range for random channel shifts
        channel_shift_range=0.,
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        # value used for fill_mode = "constant"
        cval=0.,
        # randomly flip images
        horizontal_flip=False,
        # randomly flip images
        vertical_flip=False,
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format="channels_last",
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)
    return datagen

In [None]:
import keras

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=tf.optimizers.Adam(),
              #Adadelta(),
              #SGD(),
              metrics=['accuracy'])

## here we use the same initialization for all models, and you can also use different initialization for different models
weights_initialize = model.get_weights()

In [None]:
### Set the parameters
k_value = 30
end = 100
poisoning_size = 3000

''' 
track the label frequency for each testing input, and the last dimension is used to save the true label, 
which is further used to compute the certified radius
'''
aggregate_result = np.zeros([x_test.shape[0], num_classes + 1], dtype=int)

#check poisoning label
print(count_labels(y_train))
y_train = poisoning_labels(y_train,poisoning_size)
print(count_labels(y_train))

(array([[5965, 6739, 5954, 6124, 5838, 5417, 5913, 6261, 5847, 5942]]), 60000)
(array([[8311, 6730, 5659, 5836, 5517, 5147, 5612, 5936, 5590, 5662]]), 60000)


In [None]:
## data augmentation function

datagen = DataGeneratorFunMNIST()


for repeat_time in range(end):
    # sampling with replacement.
    sample_index = np.random.choice(x_train.shape[0], k_value, replace=True)

    x_train_sample = x_train[sample_index, :, :, :]
    y_train_sample = y_train[sample_index, :]

    # train the model
    model.fit(datagen.flow(x_train_sample, y_train_sample, batch_size=batch_size),
                        epochs=epochs, verbose=0, workers=4)

    # evaluate the base model and you can also comment it without influencing the results.
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    prediction_label = np.argmax(model.predict(x_test), axis=1)
    print(prediction_label)
    aggregate_result[np.arange(0, x_test.shape[0]), prediction_label] += 1
    # reinitialize the model, note that you can also use different parameters to initialize the model
    model.set_weights(weights_initialize)
aggregate_result[np.arange(0, x_test.shape[0]), -1] = np.argmax(y_test, axis=1)

print('Complete')

Test loss: 2.867896795272827
Test accuracy: 0.6710000038146973
[7 2 1 ... 4 5 0]
Test loss: 1.304474949836731
Test accuracy: 0.7283999919891357
[7 2 1 ... 9 5 6]
Test loss: 1.9272674322128296
Test accuracy: 0.6554999947547913
[7 2 1 ... 4 8 6]
Test loss: 1.0956181287765503
Test accuracy: 0.753600001335144
[7 2 1 ... 4 0 6]
Test loss: 3.391611337661743
Test accuracy: 0.6262000203132629
[8 2 8 ... 4 5 6]
Test loss: 1.6736515760421753
Test accuracy: 0.6496999859809875
[7 0 1 ... 4 8 6]
Test loss: 1.3865855932235718
Test accuracy: 0.6891000270843506
[7 2 1 ... 4 4 6]
Test loss: 1.849130630493164
Test accuracy: 0.6359999775886536
[7 2 1 ... 4 3 6]
Test loss: 3.51739501953125
Test accuracy: 0.6388999819755554
[7 2 1 ... 4 5 2]
Test loss: 3.4684839248657227
Test accuracy: 0.5860999822616577
[7 2 7 ... 4 5 2]
Test loss: 1.714108943939209
Test accuracy: 0.6873000264167786
[7 2 1 ... 9 5 6]
Test loss: 1.7913686037063599
Test accuracy: 0.6653000116348267
[7 2 1 ... 9 1 6]
Test loss: 1.26550292968

In [None]:
def bagging_accuracy(result):
    count = 0
    size = result.shape[0]
    for idx in range(size):
        ls = result[idx][-1]
        class_freq = result[idx][:-1]
        label = np.argmax(class_freq)
        if ls == label:
            count += 1
    return (count/size*100)

print(bagging_accuracy(aggregate_result))
print(aggregate_result)

92.47
[[ 1  0  2 ...  2  2  7]
 [ 4  2 79 ...  4  0  2]
 [ 4 90  0 ...  1  0  1]
 ...
 [ 3  4  0 ...  5 27  4]
 [12  2  0 ... 28  4  5]
 [14  0  2 ...  1  1  6]]
