### Our objective: robustify a given neural network 
0. Prepare for experiments (DONE)
1. Display the pattern how benign samples and adversarial samples being evalauted by PI (e.g., B->B->B->B, B->B->B->A). (DONE)
2. Identify the layer required further improvement (PENDING)
3. A. Robustify model (PENDING)
3. B. Store the robustified model (PENDING)
4. Evalaute effectiveness (whether robustified model truly enhance the robustness in terms of attack success rate Or PI indentification rate) (PENDING)

##### Sub tasks (compulsory functionalities to achieve above procedures 
1. Print out the distribution after robustifying (DONE)
2. Complete robustified CNN (DONE)

##### Questions 
1. What if we re-train existing model by inserting a dropout layer instead of train a completely new robustified model? (PENDING)
2. A. Experimental results? dropout rate of certain layer v.s. distribution(PENDING)
2. B. Experimental results? dropout rate of certain layer v.s. attack success rate(PENDING)

In [1]:
'''
0. Prepare for experiments 
'''
%matplotlib inline
import property_inference_interface
PI = property_inference_interface.PropertyInferenceInterface()

meta_params = {
    'num_of_LPs': 4,
    'size_of_train_set': 1000,
    'size_of_test_set': 50,
    'flatten': False, 
    'model_type': 'CNN',
    'adv_attack': 'i_FGSM'
}

PI.set_meta_params(meta_params)
PI.print_meta_params()
PI.prepare_dataset()
PI.print_dataset_shape()

num_of_LPs           : 4
size_of_train_set    : 1000
size_of_test_set     : 50
flatten              : False
model_type           : CNN
adv_attack           : i_FGSM
Train dataset
(1000, 1, 28, 28) (1000,)
Test dataset
(50, 1, 28, 28) (50,)


In [2]:
'''
0. Prepare for experiments 
'''
# PI.generate_model(num_of_epochs=15)
# PI.store_model('MNIST_CNN.pt')
PI.load_model('MNIST_CNN.pt')

print('Evaluate accurancy: original')
PI.eval_model('train', on_robustified_model=False)
PI.eval_model('test', on_robustified_model=False)

Evaluate accurancy: original




Model ( train ) accurancy: 0.991
Model ( test ) accurancy: 0.98


0.98

In [None]:
'''
0. Prepare for experiments 
'''
PI.generate_LPs()
(B_detect_ratio, A_detect_ratio), (B_LPs, A_LPs), (B_LPs_score, A_LPs_score) = PI.evaluate_algorithm_on_test_set(verbose=True)

# B: Benign (normal) samples within test dataset 
# B2: Benign (normal) samples within train dataset 
# A: Adversarial samples based on benign samples within test dataset 

In [None]:
'''
1. Display the pattern how benign samples and adversarial samples being evalauted by PI (e.g., B->B->B->B, B->B->B->A).
'''
import matplotlib.pyplot as plt
import numpy as np

print(B_detect_ratio, A_detect_ratio)

qr = '95'
BLPs, ALPs = np.array(B_LPs), np.array(A_LPs) 
print(BLPs.shape, ALPs.shape)

BLPs[BLPs=='benign'] = 1
BLPs[BLPs=='adversarial'] = 0
BLPs = BLPs.astype(np.int)
prob_BLPs = np.sum(BLPs, axis=0) / BLPs.shape[0]

ALPs[ALPs=='benign'] = 1
ALPs[ALPs=='adversarial'] = 0
ALPs = ALPs.astype(np.int)
prob_ALPs = np.sum(ALPs, axis=0) / ALPs.shape[0]

print('This indicates the portion of inputs to be judged as "benign"')
print(prob_BLPs, 'test dataset (benign)')
print(prob_ALPs, 'test dataset (adversarial)')

# create plot
fig, ax = plt.subplots()
num_of_layers = 4
index = np.arange(num_of_layers)
bar_width, opacity = 0.2, 0.7

rects1 = plt.bar(index, prob_BLPs, bar_width, alpha=opacity, color='g', label='Test Ben')
rects2 = plt.bar(index + bar_width, prob_ALPs, bar_width, alpha=opacity, color='r', label='Test Ben')

plt.xlabel('I-th layer')
plt.ylabel('Benign ratio')
plt.title('Benign ratio in different layers ('+qr+'qr)')
plt.xticks(index + bar_width, ('1', '2', '3', '4'))
plt.legend()

plt.tight_layout()
plt.show()

B_LPs_score, A_LPs_score = np.array(B_LPs_score), np.array(A_LPs_score)
for i in range(B_LPs_score.shape[1]):
    B_score, A_score = B_LPs_score[:,i], A_LPs_score[:, i]
    B_indices, A_indices = np.arange(B_score.shape[0]), np.arange(A_score.shape[0])
    plt.plot(B_score, B_indices, 'go')
    plt.plot(A_score, A_indices, 'ro')
    
plt.show()

In [None]:
'''
2. Identify the layer required further improvement (PENDING)
'''
# func (for automatically locate which layer should be further improved)

In [None]:
'''
3. Robustify model 
let's say it's the 3rd layer 
'''

PI.generate_robustified_model('CNN', 15, 0.3)
print('Evaluate accurancy: original')
PI.eval_model('train', on_robustified_model=False)
PI.eval_model('test', on_robustified_model=False)
print('Evaluate accurancy: robustified')
PI.eval_model('train', on_robustified_model=True)
PI.eval_model('test', on_robustified_model=True)

In [None]:
def robustify_by_dr(PI):
    import copy
    import matplotlib.pyplot as plt
    import numpy as np
    PI.model = copy.deepcopy(PI.robustified_model)
    PI.generate_LPs()
    (B_detect_ratio, A_detect_ratio), (B_LPs, A_LPs), (B_LPs_score, A_LPs_score) = PI.evaluate_algorithm_on_test_set(verbose=False)
    print(B_detect_ratio, A_detect_ratio)

    qr = '95'
    BLPs, ALPs = np.array(B_LPs), np.array(A_LPs) 
    print(BLPs.shape, ALPs.shape)

    BLPs[BLPs=='benign'] = 1
    BLPs[BLPs=='adversarial'] = 0
    BLPs = BLPs.astype(np.int)
    prob_BLPs = np.sum(BLPs, axis=0) / BLPs.shape[0]

    ALPs[ALPs=='benign'] = 1
    ALPs[ALPs=='adversarial'] = 0
    ALPs = ALPs.astype(np.int)
    prob_ALPs = np.sum(ALPs, axis=0) / ALPs.shape[0]

    print('This indicates the portion of inputs to be judged as "benign"')
    print(prob_BLPs, 'test dataset (benign)')
    print(prob_ALPs, 'test dataset (adversarial)')
    return (prob_ALPs, prob_BLPs, A_LPs_score, B_LPs_score)

def draw(results):
    prob_ALPs, prob_BLPs, A_LPs_score, B_LPs_score = results
    
    # create plot
    num_of_layers = 4
    index = np.arange(num_of_layers)
    bar_width, opacity = 0.2, 0.7

    rects1 = plt.bar(index, prob_BLPs, bar_width, alpha=opacity, color='g', label='Test Ben')
    rects2 = plt.bar(index + bar_width, prob_ALPs, bar_width, alpha=opacity, color='r', label='Test Ben')

    plt.xlabel('I-th layer')
    plt.ylabel('Benign ratio')
    plt.title('Benign ratio in different layers ('+qr+'qr)')
    plt.xticks(index + bar_width, ('1', '2', '3', '4'))
    plt.legend()
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(18,3))
    B_LPs_score, A_LPs_score = np.array(B_LPs_score), np.array(A_LPs_score)
    for i in range(B_LPs_score.shape[1]):
        B_score, A_score = B_LPs_score[:,i], A_LPs_score[:, i]
        B_indices, A_indices = np.arange(B_score.shape[0]), np.arange(A_score.shape[0])
        plt.subplot(1, 4, i+1)
        plt.plot(B_score, B_indices, 'go')
        plt.plot(A_score, A_indices, 'ro')
    plt.show()


In [None]:
results = robustify_by_dr(PI)
draw(results)