In [1]:
from __future__ import print_function
from __future__ import division

import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
from math import sqrt
import sklearn
from sklearn.linear_model import LogisticRegression
import pickle
import time
import torch

from utils import test_accuracy, projection, veccos, binary_search_cx, \
                  find_exp_score, find_slab_score, project_l2_centroid, \
                  project_l2_centroid_straight, project_slab, project_slab_straight,\
                  contaminate_dataset

from attackers import StraightAttack, SemiOnlineAttack, ConcentratedAttack, GreedyAttack

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
## fully online experiment

def experiment(dataset, taus=None):
    
    filepath = "./data/"+dataset+"/"
    clf = LogisticRegression(fit_intercept=False, solver='liblinear')
    if (taus==None):
        n_taus = 5
    else:
        n_taus = len(taus)
    res = [0 for a in range(n_taus)]
    
    for i in range(n_exp):    
        print ("Running the {}-th experiment".format(i))
        start_time = time.time()
        filename = filepath+str(i)
        with open(filename,"rb") as f:
            datasets = pickle.load(f)

        X_init, Y_init = datasets[0] # generate defense constraints and init w if necessary.
        X_clean, Y_clean = datasets[1] # the clean data stream
        X_valid, Y_valid = datasets[2] # validation set
        X_test, Y_test = datasets[3]   # the actual test set

        clf.fit(X_init, Y_init)
        w_0 = np.zeros((1,d))
        print ("Initial accuracy is {}".format(test_accuracy(X_test, Y_test, w_0)))
        w_t = -clf.coef_
        w_t /= norm(w_t)
        print ("Target accuracy is {}".format(test_accuracy(X_test, Y_test, w_t)))

        if attack_method == "simplistic":
            attacker = StraightAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "greedy":
            attacker = GreedyAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "concentrated":
            attacker = ConcentratedAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "semi-online":
            attacker = SemiOnlineAttack()
            X_adv, Y_adv = X_clean[:n_attack, :], Y_clean[:n_attack]
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack, (-X_adv, Y_adv))

        global loc
        
        attacker.set_init_set(X_init, Y_init)
        if defense_method == "slab":
            scores = attacker.slab_scores()[-1]
        elif defense_method == "norm":
            scores = attacker.l2_norms()[-1]
        elif defense_method == "L2":
            scores = attacker.l2_distances_to_centroid()[-1]
        
        tau_levels = [0.3, 0.5, 0.7, 0.9, 1]
        taus = [scores[int(tau_level*len(scores))-1] for tau_level in tau_levels]
        print (taus)
        w_list = [[] for tau in taus]
        res = [[] for tau in taus]
        accs = [[] for tau in taus]
        online_res = [[] for tau in taus]
        for j,tau in enumerate(taus):

            attacker.set_defense_threshold(defense_method, tau)
            attacker.set_defense_range(defense_range)
            attacker.set_init_set(X_init, Y_init)

            i_clean = 0
            
            for k in range(n_horizon):
                if loc[k]: #attack
                    if attack_method == "semi-online":
                        attacker.set_param_lite(attacker.w_curr, w_t, 0, 1, 
                                           (-X_clean[k].reshape(1,-1), Y_clean[k].reshape(1)))

                    pt = attacker.find_best_poisoning_points()
                    attacker.update_w(pt)
                    online_res[j].append(0)
                else:      #update on clean point
                    pt = (attacker.X_clean[k,:], attacker.Y_clean[k])
                    online_res[j].append(round(test_accuracy(X_clean[k].reshape(1,-1), 
                                                             Y_clean[k].reshape(1),
                                                             attacker.w_curr)))
                    if attacker.meet_constraints(pt):
                        attacker.update_w(pt)
                    i_clean += 1
                acc = test_accuracy(X_test, Y_test, attacker.w_curr)
                w_list[j].append(attacker.w_curr)
                accs[j].append(acc)
                if k%50==0:
                    print ("Accuracy at time {} is {}".format(k, acc))
                
            print ("acc on clean stream is", sum(online_res[j])/i_clean)
                    
            print (tau, sum(accs[j])/n_horizon)
            res[j] = (sum(accs[j])/n_horizon)
            attacker.reset()
            print (res)
            print("--- %s seconds ---" % (time.time() - start_time))
            
        results = [res, online_res, accs, w_list, taus, loc]
        filepath = "_".join(["./results/fully", dataset, attack_method, defense_method, defense_range, str(i)])
        print (filepath)
        with open(filepath, "wb") as f:
            pickle.dump(results, f)
        filepath = "_".join(["./results/fully", dataset, attack_method, defense_method, defense_range, str(i)])
        print (filepath)    
        with open(filepath, "rb") as f:
            results = pickle.load(f)
        res, online_res, accs, w_list, taus, loc = results    
    return w_list, (X_test, Y_test)

In [3]:
n_exp = 1
d, eta = 784, 0.01
n_attack, n_clean, n_init, n_test, n_valid = 100, 8000, 1000, 1000, 500
n_iter_warmup = n_clean
n_horizon = 1000
attack_chance = 0.1
dataset = "fashionMNIST"
loc = (np.random.permutation(n_horizon) < n_horizon*attack_chance)

R = 12


In [4]:
defense_method = "norm"
defense = {defense_method:0}
attack_methods = ["simplistic", "greedy", "semi-online"]
for attack_method in attack_methods:
    defense_range = "all-pts"
    w_list1, (X_test, Y_test) = experiment(dataset)
    res1 = [test_accuracy(X_test, Y_test, w) for w in w_list1[-1]]

Running the 0-th experiment
Initial accuracy is 0.499
Target accuracy is 0.01
[6.966801607380625, 7.542731555316739, 8.550132474332946, 10.200196326442075, 15.78008818241856]
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.796
Accuracy at time 100 is 0.762
Accuracy at time 150 is 0.794
Accuracy at time 200 is 0.796
Accuracy at time 250 is 0.806
Accuracy at time 300 is 0.776
Accuracy at time 350 is 0.74
Accuracy at time 400 is 0.81
Accuracy at time 450 is 0.807
Accuracy at time 500 is 0.806
Accuracy at time 550 is 0.768
Accuracy at time 600 is 0.765
Accuracy at time 650 is 0.755
Accuracy at time 700 is 0.677
Accuracy at time 750 is 0.74
Accuracy at time 800 is 0.719
Accuracy at time 850 is 0.711
Accuracy at time 900 is 0.659
Accuracy at time 950 is 0.636
acc on clean stream is 0.7422222222222222
6.966801607380625 0.7473879999999986
[0.7473879999999986, [], [], [], []]
--- 4.467849254608154 seconds ---
Accuracy at time 0 is 0.442
Accuracy at time 50 is 0.835
Accuracy at time 100 is 

Running the 0-th experiment
Initial accuracy is 0.499
Target accuracy is 0.01
[6.966801607380625, 7.542731555316739, 8.550132474332946, 10.200196326442075, 15.78008818241856]
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.683
Accuracy at time 100 is 0.012
Accuracy at time 150 is 0.821
Accuracy at time 200 is 0.818
Accuracy at time 250 is 0.86
Accuracy at time 300 is 0.752
Accuracy at time 350 is 0.239
Accuracy at time 400 is 0.886
Accuracy at time 450 is 0.877
Accuracy at time 500 is 0.868
Accuracy at time 550 is 0.757
Accuracy at time 600 is 0.567
Accuracy at time 650 is 0.662
Accuracy at time 700 is 0.012
Accuracy at time 750 is 0.885
Accuracy at time 800 is 0.811
Accuracy at time 850 is 0.834
Accuracy at time 900 is 0.521
Accuracy at time 950 is 0.671
acc on clean stream is 0.58
6.966801607380625 0.6092459999999981
[0.6092459999999981, [], [], [], []]
--- 198.80798864364624 seconds ---
Accuracy at time 0 is 0.442
Accuracy at time 50 is 0.839
Accuracy at time 100 is 0.808
Accur

In [5]:
defense_method = "L2"
defense = {defense_method:0}

for attack_method in attack_methods:
    defense_range = "all-pts"
    w_list2, (X_test, Y_test) = experiment(dataset)
    res2 = [test_accuracy(X_test, Y_test, w) for w in w_list2[-1]]

Running the 0-th experiment
Initial accuracy is 0.499
Target accuracy is 0.01
[5.873603364436745, 6.905138157495109, 7.852809741062167, 9.04583067227028, 14.03859315480033]
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.84
Accuracy at time 100 is 0.849
Accuracy at time 150 is 0.853
Accuracy at time 200 is 0.846
Accuracy at time 250 is 0.847
Accuracy at time 300 is 0.835
Accuracy at time 350 is 0.82
Accuracy at time 400 is 0.833
Accuracy at time 450 is 0.835
Accuracy at time 500 is 0.836
Accuracy at time 550 is 0.829
Accuracy at time 600 is 0.826
Accuracy at time 650 is 0.825
Accuracy at time 700 is 0.808
Accuracy at time 750 is 0.812
Accuracy at time 800 is 0.809
Accuracy at time 850 is 0.81
Accuracy at time 900 is 0.802
Accuracy at time 950 is 0.801
acc on clean stream is 0.8255555555555556
5.873603364436745 0.8221759999999976
[0.8221759999999976, [], [], [], []]
--- 3.6038308143615723 seconds ---
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.856
Accuracy at time 100 is 0

Running the 0-th experiment
Initial accuracy is 0.499
Target accuracy is 0.01
[5.873603364436745, 6.905138157495109, 7.852809741062167, 9.04583067227028, 14.03859315480033]
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.848
Accuracy at time 100 is 0.852
Accuracy at time 150 is 0.857
Accuracy at time 200 is 0.848
Accuracy at time 250 is 0.851
Accuracy at time 300 is 0.845
Accuracy at time 350 is 0.833
Accuracy at time 400 is 0.846
Accuracy at time 450 is 0.846
Accuracy at time 500 is 0.846
Accuracy at time 550 is 0.841
Accuracy at time 600 is 0.842
Accuracy at time 650 is 0.841
Accuracy at time 700 is 0.828
Accuracy at time 750 is 0.829
Accuracy at time 800 is 0.828
Accuracy at time 850 is 0.828
Accuracy at time 900 is 0.826
Accuracy at time 950 is 0.825
acc on clean stream is 0.8422222222222222
5.873603364436745 0.8343800000000008
[0.8343800000000008, [], [], [], []]
--- 118.9501211643219 seconds ---
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.864
Accuracy at time 100 is

In [6]:
defense_method = "slab"
defense = {defense_method:0}

for attack_method in attack_methods:
    defense_range = "all-pts"
    w_list3, (X_test, Y_test) = experiment(dataset)
    res3 = [test_accuracy(X_test, Y_test, w) for w in w_list3[-1]]

Running the 0-th experiment
Initial accuracy is 0.499
Target accuracy is 0.01
[6.779656432481225, 11.644085100338394, 17.264378418085613, 32.042097807863875, 71.17228956428355]
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.915
Accuracy at time 100 is 0.918
Accuracy at time 150 is 0.919
Accuracy at time 200 is 0.92
Accuracy at time 250 is 0.917
Accuracy at time 300 is 0.922
Accuracy at time 350 is 0.92
Accuracy at time 400 is 0.92
Accuracy at time 450 is 0.92
Accuracy at time 500 is 0.92
Accuracy at time 550 is 0.919
Accuracy at time 600 is 0.918
Accuracy at time 650 is 0.917
Accuracy at time 700 is 0.918
Accuracy at time 750 is 0.92
Accuracy at time 800 is 0.918
Accuracy at time 850 is 0.917
Accuracy at time 900 is 0.917
Accuracy at time 950 is 0.917
acc on clean stream is 0.9077777777777778
6.779656432481225 0.9168190000000103
[0.9168190000000103, [], [], [], []]
--- 3.1035523414611816 seconds ---
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.916
Accuracy at time 100 is 

Running the 0-th experiment
Initial accuracy is 0.499
Target accuracy is 0.01
[6.779656432481225, 11.644085100338394, 17.264378418085613, 32.042097807863875, 71.17228956428355]
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.868
Accuracy at time 100 is 0.889
Accuracy at time 150 is 0.892
Accuracy at time 200 is 0.891
Accuracy at time 250 is 0.889
Accuracy at time 300 is 0.894
Accuracy at time 350 is 0.894
Accuracy at time 400 is 0.895
Accuracy at time 450 is 0.896
Accuracy at time 500 is 0.898
Accuracy at time 550 is 0.898
Accuracy at time 600 is 0.896
Accuracy at time 650 is 0.896
Accuracy at time 700 is 0.897
Accuracy at time 750 is 0.897
Accuracy at time 800 is 0.896
Accuracy at time 850 is 0.896
Accuracy at time 900 is 0.896
Accuracy at time 950 is 0.896
acc on clean stream is 0.8833333333333333
6.779656432481225 0.891887999999994
[0.891887999999994, [], [], [], []]
--- 106.7113127708435 seconds ---
Accuracy at time 0 is 0.499
Accuracy at time 50 is 0.878
Accuracy at time 100 