In [1]:
from __future__ import print_function
from __future__ import division

import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
from math import sqrt
import sklearn
from sklearn.linear_model import LogisticRegression
import pickle
import time
import torch

from utils import test_accuracy, projection, veccos, binary_search_cx, \
                  find_exp_score, find_slab_score, project_l2_centroid, \
                  project_l2_centroid_straight, project_slab, project_slab_straight,\
                  contaminate_dataset, find_regime_threshold

from attackers import StraightAttack, SemiOnlineAttack, ConcentratedAttack, GreedyAttack

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def experiment(dataset, taus=None, contamination_levels=[0]):
    
    filepath = "./data/"+dataset+"/"
    clf = LogisticRegression(fit_intercept=False, solver='liblinear')
    n_taus = 11
    res = [[0 for a in range(n_taus)] for b in contamination_levels]
    w_res = [[] for b in contamination_levels]
    acc_res = [[0 for a in range(n_taus)] for b in contamination_levels]
    
    for i in range(n_exp):    
        print ("Running the {}-th experiment".format(i))
        start_time = time.time()
        filename = filepath+str(i)
        with open(filename,"rb") as f:
            datasets = pickle.load(f)

        X_init, Y_init = datasets[0] # generate defense constraints and init w if necessary.
        X_clean, Y_clean = datasets[1] # the clean data stream
        X_valid, Y_valid = datasets[2] # validation set
        X_test, Y_test = datasets[3]   # the actual test set

        clf.fit(X_init, Y_init)
        w_0 = np.zeros((1,d))
        print ("Initial accuracy is {}".format(test_accuracy(X_test, Y_test, w_0)))
        w_t = -clf.coef_
        print ("Target accuracy is {}".format(test_accuracy(X_test, Y_test, w_t)))
       
        defense_display_names = {"norm":"L_2-norm",
                                 "L2": "L_2-distance-to-centroid",
                                 "slab":"Slab"}
        print ("Attack method : {} Defense method : {}".format(attack_method,
                                                              defense_display_names[defense_method]))
        if attack_method == "simplistic":
            attacker = StraightAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "greedy":
            attacker = GreedyAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "concentrated":
            attacker = ConcentratedAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack)         
        elif attack_method == "semi-online":
            attacker = SemiOnlineAttack()
            X_adv, Y_adv = X_clean[:n_attack, :], Y_clean[:n_attack]
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack, (-X_adv, Y_adv))
        attacker.set_init_set(X_init, Y_init)
        
        if defense_method == "slab":
            mu, scores = attacker.slab_scores()
        elif defense_method == "norm":
            mu, scores = attacker.l2_norms()
        elif defense_method == "L2":
            mu, scores = attacker.l2_distances_to_centroid()
        
        tau_levels = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
        taus = [0]+[scores[int(tau_level*len(scores))-1] for tau_level in tau_levels]
        print ("taus to be tested are {}".format(taus))
        
        for j,contamination_level in enumerate(contamination_levels):
            
            X_init_t, Y_init_t = contaminate_dataset((np.copy(X_init), Y_init),contamination_level, R)
            attacker.set_init_set(X_init, Y_init)
            
            for k,tau in enumerate(taus):
                attacker.set_defense_threshold(defense_method, tau)
                attacker.set_defense_range(defense_range)
                attacker.set_init_set(X_init_t, Y_init_t)
                attacker.w_curr = w_0
                attacker.warmup(n_iter_warmup)
                w_b = attacker.w_curr
                attacker.set_target(-w_b)
                attacker.slab_scores()
                regime_thres = find_regime_threshold(mu, w_b, -w_b, defense_method)
                res_acc, _, res_w = attacker.attack()
                cos_sim = np.dot(w_b.flatten(), 
                                 res_w.flatten())/(np.linalg.norm(w_b)*np.linalg.norm(res_w)+1e-5) 
                print ("cosine score: {} test acc: {}".format(round(cos_sim,3), 
                                                              min(res_acc)))
                res[j][k] += (cos_sim/n_exp)
                acc_res[j][k] += (min(res_acc)/n_exp)
                print (res[j][k], acc_res[j][k])
                w_res[j].append(res_w)
                attacker.reset()
        print("--- %s seconds ---" % (time.time() - start_time))

    print ("Predicted threshold of regimes: {}".format(regime_thres))
    results = [res, w_res, acc_res, taus, contamination_levels, regime_thres]
    filepath = "_".join(["./results/", dataset, attack_method, defense_method, defense_range])
      
    print (filepath)
    with open(filepath, "wb") as f:
        pickle.dump(results, f)
    filepath = "_".join(["./results/", dataset, attack_method, defense_method, defense_range])
    print (filepath)    
    with open(filepath, "rb") as f:
        results = pickle.load(f)
    res, w_res, acc_res, taus, contamination_levels, regime_thres = results
    print (res, acc_res)

In [3]:
n_exp = 1
d, eta = 9, 0.05
n_attack, n_clean, n_init, n_test, n_valid = 80, 400, 100, 100, 50
n_iter_warmup = n_clean
dataset = "BreastCancer"
R = 4

In [4]:
defense_method = "norm"
defense = {defense_method:0}
attack_methods = ["simplistic", "greedy", "semi-online", "concentrated"]
for attack_method in attack_methods:
    defense_range = "att-only"
    experiment(dataset)
    

Running the 0-th experiment
Initial accuracy is 0.55
Target accuracy is 0.02
Attack method : simplistic Defense method : L_2-norm
taus to be tested are [0, 0.6234772062887555, 0.6885760733299814, 0.7302562831920678, 0.8021449242142602, 0.8955347814864546, 0.9403466530645418, 1.1345017796356214, 1.5188338948308377, 1.9014260080608296, 2.8311477761324935]
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999957405960365 0.97
cosine score: 1.0 test acc: 0.97
0.9999944185414907 0.97
cosine score: 1.0 test acc: 0.97
0.9999930895435333 0.97
cosine score: 1.0 test acc: 0.97
0.999988619397134 0.97
cosine score: 1.0 test acc: 0.97
0.999949568319432 0.97
cosine score: -1.0 test acc: 0.03
-0.9999019641200774 0.03
cosine score: -1.0 test acc: 0.03
-0.9999915919049127 0.03
cosine score: -1.0 test acc: 0.03
-0.9999958605429601 0.03
cosine score: -1.0 test acc: 0.03
-0.9999965250124168 0.03
cosine score: -1.0 test acc: 0.03
-0.9999965900371206 0.03
--- 0.450

In [5]:
defense_method = "L2"
defense = {defense_method:0}

for attack_method in attack_methods:
    defense_range = "att-only"
    experiment(dataset)


Running the 0-th experiment
Initial accuracy is 0.55
Target accuracy is 0.02
Attack method : simplistic Defense method : L_2-distance-to-centroid
taus to be tested are [0, 0.1780488073623002, 0.2681545932680094, 0.3859318136380788, 0.41384845116286595, 0.4376050076260244, 0.5762988702901451, 0.9070230503049862, 1.128046467190161, 1.3741854968587637, 1.922456238081762]
cosine score: 1.0 test acc: 0.97
0.9999925936202259 0.97
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999986480056474 0.97
cosine score: 1.0 test acc: 0.97
0.9999981831802714 0.97
cosine score: 1.0 test acc: 0.97
0.9999971839597377 0.97
cosine score: 1.0 test acc: 0.97
0.999992923768551 0.97
cosine score: -1.0 test acc: 0.03
-0.9999944204993553 0.03
-

In [6]:
defense_method = "slab"
defense = {defense_method:0}
for attack_method in attack_methods:
    defense_range = "att-only"
    experiment(dataset)

Running the 0-th experiment
Initial accuracy is 0.55
Target accuracy is 0.02
Attack method : semi-online Defense method : Slab
taus to be tested are [0, 0.07373681375194613, 0.1454201343040102, 0.2306696389777392, 0.3060937567683501, 0.3837293717261453, 0.44278778205080144, 0.556452819684853, 0.8035954128214978, 1.433274847482336, 3.8435622629189736]
cosine score: 0.967 test acc: 0.98
0.96749187915831 0.98
cosine score: 0.965 test acc: 0.98
0.9647417357805089 0.98
cosine score: 0.962 test acc: 0.98
0.961885443124445 0.98
cosine score: 0.958 test acc: 0.98
0.9582591414076662 0.98
cosine score: 0.955 test acc: 0.98
0.9548154339292718 0.98
cosine score: 0.951 test acc: 0.98
0.9510176863120258 0.98
cosine score: 0.948 test acc: 0.98
0.9479443949255139 0.98
cosine score: 0.942 test acc: 0.98
0.941569156409803 0.98
cosine score: 0.925 test acc: 0.98
0.9252579917948844 0.98
cosine score: 0.864 test acc: 0.97
0.8643205221283807 0.97
cosine score: -0.943 test acc: 0.03
-0.9432427671860548 0.03
