In [1]:
from __future__ import print_function
from __future__ import division

import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
from math import sqrt
import sklearn
from sklearn.linear_model import LogisticRegression
import pickle
import time
import torch

from utils import test_accuracy, projection, veccos, binary_search_cx, \
                  find_exp_score, find_slab_score, project_l2_centroid, \
                  project_l2_centroid_straight, project_slab, project_slab_straight,\
                  contaminate_dataset

from attackers import StraightAttack, SemiOnlineAttack, ConcentratedAttack, GreedyAttack

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
## fully online experiment

def experiment(dataset, taus=None):
    
    filepath = "./data/"+dataset+"/"
    clf = LogisticRegression(fit_intercept=False, solver='liblinear')
    if (taus==None):
        n_taus = 5
    else:
        n_taus = len(taus)
    res = [0 for a in range(n_taus)]
    
    for i in range(n_exp):    
        print ("Running the {}-th experiment".format(i))
        start_time = time.time()
        filename = filepath+str(i)
        with open(filename,"rb") as f:
            datasets = pickle.load(f)

        X_init, Y_init = datasets[0] # generate defense constraints and init w if necessary.
        X_clean, Y_clean = datasets[1] # the clean data stream
        X_valid, Y_valid = datasets[2] # validation set
        X_test, Y_test = datasets[3]   # the actual test set

        clf.fit(X_init, Y_init)
        w_0 = np.zeros((1,d))
        print ("Initial accuracy is {}".format(test_accuracy(X_test, Y_test, w_0)))
        w_t = -clf.coef_
        w_t /= norm(w_t)
        print ("Target accuracy is {}".format(test_accuracy(X_test, Y_test, w_t)))

        if attack_method == "simplistic":
            attacker = StraightAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "greedy":
            attacker = GreedyAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "concentrated":
            attacker = ConcentratedAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "semi-online":
            attacker = SemiOnlineAttack()
            X_adv, Y_adv = X_clean[:n_attack, :], Y_clean[:n_attack]
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack, (-X_adv, Y_adv))

        global loc
        
        attacker.set_init_set(X_init, Y_init)
        if defense_method == "slab":
            scores = attacker.slab_scores()[-1]
        elif defense_method == "norm":
            scores = attacker.l2_norms()[-1]
        elif defense_method == "L2":
            scores = attacker.l2_distances_to_centroid()[-1]
        
        tau_levels = [0.3, 0.5, 0.7, 0.9, 1]
        taus = [scores[int(tau_level*len(scores))-1] for tau_level in tau_levels]
        print (taus)
        w_list = [[] for tau in taus]
        res = [[] for tau in taus]
        accs = [[] for tau in taus]
        online_res = [[] for tau in taus]
        for j,tau in enumerate(taus):

            attacker.set_defense_threshold(defense_method, tau)
            attacker.set_defense_range(defense_range)
            attacker.set_init_set(X_init, Y_init)

            i_clean = 0
            
            for k in range(n_horizon):
                if loc[k]: #attack
                    if attack_method == "semi-online":
                        attacker.set_param_lite(attacker.w_curr, w_t, 0, 1, 
                                           (-X_clean[k].reshape(1,-1), Y_clean[k].reshape(1)))

                    pt = attacker.find_best_poisoning_points()
                    attacker.update_w(pt)
                    online_res[j].append(0)
                else:      #update on clean point
                    pt = (attacker.X_clean[k,:], attacker.Y_clean[k])
                    online_res[j].append(round(test_accuracy(X_clean[k].reshape(1,-1), 
                                                             Y_clean[k].reshape(1),
                                                             attacker.w_curr)))
                    if attacker.meet_constraints(pt):
                        attacker.update_w(pt)
                    i_clean += 1
                acc = test_accuracy(X_test, Y_test, attacker.w_curr)
                w_list[j].append(attacker.w_curr)
                accs[j].append(acc)
                if k%50==0:
                    print ("Accuracy at time {} is {}".format(k, acc))
                
            print ("acc on clean stream is", sum(online_res[j])/i_clean)
                    
            print (tau, sum(accs[j])/n_horizon)
            res[j] = (sum(accs[j])/n_horizon)
            attacker.reset()
            print (res)
            print("--- %s seconds ---" % (time.time() - start_time))
            
        results = [res, online_res, accs, w_list, taus, loc]
        filepath = "_".join(["./results/fully", dataset, attack_method, defense_method, defense_range, str(i)])
        print (filepath)
        with open(filepath, "wb") as f:
            pickle.dump(results, f)
        filepath = "_".join(["./results/fully", dataset, attack_method, defense_method, defense_range, str(i)])
        print (filepath)    
        with open(filepath, "rb") as f:
            results = pickle.load(f)
        res, online_res, accs, w_list, taus, loc = results    
    return w_list, (X_test, Y_test)

In [3]:
n_exp = 1
d, eta = 784, 0.01
n_attack, n_clean, n_init, n_test, n_valid = 100, 8000, 1000, 1000, 500
n_iter_warmup = n_clean
n_horizon = 1000
attack_chance = 0.1
dataset = "MNIST"
loc = (np.random.permutation(n_horizon) < n_horizon*attack_chance)

R = 12


In [4]:
defense_method = "norm"
defense = {defense_method:0}
attack_methods = ["simplistic", "greedy", "semi-online"]
for attack_method in attack_methods:
    defense_range = "all-pts"
    w_list1, (X_test, Y_test) = experiment(dataset)
    res1 = [test_accuracy(X_test, Y_test, w) for w in w_list1[-1]]

Running the 0-th experiment
Initial accuracy is 0.5
Target accuracy is 0.008
[5.2488356, 5.786202, 6.463639, 7.5627246, 10.939077]
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.451
Accuracy at time 100 is 0.683
Accuracy at time 150 is 0.763
Accuracy at time 200 is 0.876
Accuracy at time 250 is 0.826
Accuracy at time 300 is 0.813
Accuracy at time 350 is 0.786
Accuracy at time 400 is 0.847
Accuracy at time 450 is 0.784
Accuracy at time 500 is 0.766
Accuracy at time 550 is 0.735
Accuracy at time 600 is 0.633
Accuracy at time 650 is 0.546
Accuracy at time 700 is 0.598
Accuracy at time 750 is 0.482
Accuracy at time 800 is 0.443
Accuracy at time 850 is 0.508
Accuracy at time 900 is 0.496
Accuracy at time 950 is 0.414
acc on clean stream is 0.6444444444444445
5.2488356 0.6387669999999989
[0.6387669999999989, [], [], [], []]
--- 4.607030391693115 seconds ---
Accuracy at time 0 is 0.713
Accuracy at time 50 is 0.92
Accuracy at time 100 is 0.924
Accuracy at time 150 is 0.925
Accuracy at time

Accuracy at time 50 is 0.574
Accuracy at time 100 is 0.488
Accuracy at time 150 is 0.789
Accuracy at time 200 is 0.924
Accuracy at time 250 is 0.834
Accuracy at time 300 is 0.813
Accuracy at time 350 is 0.746
Accuracy at time 400 is 0.887
Accuracy at time 450 is 0.806
Accuracy at time 500 is 0.806
Accuracy at time 550 is 0.788
Accuracy at time 600 is 0.607
Accuracy at time 650 is 0.433
Accuracy at time 700 is 0.669
Accuracy at time 750 is 0.582
Accuracy at time 800 is 0.428
Accuracy at time 850 is 0.705
Accuracy at time 900 is 0.804
Accuracy at time 950 is 0.747
acc on clean stream is 0.6822222222222222
5.2488356 0.6825930000000009
[0.6825930000000009, [], [], [], []]
--- 145.5084924697876 seconds ---
Accuracy at time 0 is 0.713
Accuracy at time 50 is 0.961
Accuracy at time 100 is 0.94
Accuracy at time 150 is 0.935
Accuracy at time 200 is 0.947
Accuracy at time 250 is 0.927
Accuracy at time 300 is 0.911
Accuracy at time 350 is 0.884
Accuracy at time 400 is 0.931
Accuracy at time 450 is

In [5]:
defense_method = "L2"
defense = {defense_method:0}

for attack_method in attack_methods:
    defense_range = "all-pts"
    w_list2, (X_test, Y_test) = experiment(dataset)
    res2 = [test_accuracy(X_test, Y_test, w) for w in w_list2[-1]]

Running the 0-th experiment
Initial accuracy is 0.5
Target accuracy is 0.008
[4.5975723, 5.237112, 5.9059076, 6.980779, 10.630243]
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.937
Accuracy at time 100 is 0.951
Accuracy at time 150 is 0.956
Accuracy at time 200 is 0.96
Accuracy at time 250 is 0.954
Accuracy at time 300 is 0.948
Accuracy at time 350 is 0.949
Accuracy at time 400 is 0.951
Accuracy at time 450 is 0.945
Accuracy at time 500 is 0.945
Accuracy at time 550 is 0.945
Accuracy at time 600 is 0.942
Accuracy at time 650 is 0.935
Accuracy at time 700 is 0.935
Accuracy at time 750 is 0.928
Accuracy at time 800 is 0.916
Accuracy at time 850 is 0.913
Accuracy at time 900 is 0.913
Accuracy at time 950 is 0.891
acc on clean stream is 0.91
4.5975723 0.9329050000000009
[0.9329050000000009, [], [], [], []]
--- 9.07059621810913 seconds ---
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.95
Accuracy at time 100 is 0.961
Accuracy at time 150 is 0.964
Accuracy at time 200 is 0.962
Accu

Accuracy at time 50 is 0.947
Accuracy at time 100 is 0.952
Accuracy at time 150 is 0.958
Accuracy at time 200 is 0.956
Accuracy at time 250 is 0.952
Accuracy at time 300 is 0.945
Accuracy at time 350 is 0.947
Accuracy at time 400 is 0.95
Accuracy at time 450 is 0.947
Accuracy at time 500 is 0.946
Accuracy at time 550 is 0.945
Accuracy at time 600 is 0.942
Accuracy at time 650 is 0.937
Accuracy at time 700 is 0.937
Accuracy at time 750 is 0.93
Accuracy at time 800 is 0.928
Accuracy at time 850 is 0.927
Accuracy at time 900 is 0.922
Accuracy at time 950 is 0.909
acc on clean stream is 0.92
4.5975723 0.9380770000000013
[0.9380770000000013, [], [], [], []]
--- 132.9799883365631 seconds ---
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.958
Accuracy at time 100 is 0.956
Accuracy at time 150 is 0.962
Accuracy at time 200 is 0.96
Accuracy at time 250 is 0.958
Accuracy at time 300 is 0.953
Accuracy at time 350 is 0.947
Accuracy at time 400 is 0.956
Accuracy at time 450 is 0.951
Accuracy at

In [6]:
defense_method = "slab"
defense = {defense_method:0}

for attack_method in attack_methods:
    defense_range = "all-pts"
    w_list3, (X_test, Y_test) = experiment(dataset)
    res3 = [test_accuracy(X_test, Y_test, w) for w in w_list3[-1]]

Running the 0-th experiment
Initial accuracy is 0.5
Target accuracy is 0.008
[2.0000656, 3.6035678, 5.7759647, 10.817202, 29.806637]
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.955
Accuracy at time 100 is 0.961
Accuracy at time 150 is 0.961
Accuracy at time 200 is 0.959
Accuracy at time 250 is 0.962
Accuracy at time 300 is 0.962
Accuracy at time 350 is 0.962
Accuracy at time 400 is 0.963
Accuracy at time 450 is 0.964
Accuracy at time 500 is 0.965
Accuracy at time 550 is 0.964
Accuracy at time 600 is 0.965
Accuracy at time 650 is 0.965
Accuracy at time 700 is 0.965
Accuracy at time 750 is 0.965
Accuracy at time 800 is 0.965
Accuracy at time 850 is 0.965
Accuracy at time 900 is 0.965
Accuracy at time 950 is 0.965
acc on clean stream is 0.9477777777777778
2.0000656 0.9619550000000148
[0.9619550000000148, [], [], [], []]
--- 3.7363219261169434 seconds ---
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.952
Accuracy at time 100 is 0.959
Accuracy at time 150 is 0.964
Accuracy at ti

[2.0000656, 3.6035678, 5.7759647, 10.817202, 29.806637]
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.94
Accuracy at time 100 is 0.949
Accuracy at time 150 is 0.953
Accuracy at time 200 is 0.954
Accuracy at time 250 is 0.955
Accuracy at time 300 is 0.956
Accuracy at time 350 is 0.956
Accuracy at time 400 is 0.956
Accuracy at time 450 is 0.957
Accuracy at time 500 is 0.957
Accuracy at time 550 is 0.956
Accuracy at time 600 is 0.958
Accuracy at time 650 is 0.961
Accuracy at time 700 is 0.958
Accuracy at time 750 is 0.959
Accuracy at time 800 is 0.961
Accuracy at time 850 is 0.961
Accuracy at time 900 is 0.96
Accuracy at time 950 is 0.957
acc on clean stream is 0.9377777777777778
2.0000656 0.9541550000000023
[0.9541550000000023, [], [], [], []]
--- 164.70500016212463 seconds ---
Accuracy at time 0 is 0.5
Accuracy at time 50 is 0.944
Accuracy at time 100 is 0.95
Accuracy at time 150 is 0.954
Accuracy at time 200 is 0.953
Accuracy at time 250 is 0.956
Accuracy at time 300 is 0.957
Accu