In [1]:
from __future__ import print_function
from __future__ import division

import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
from math import sqrt
import sklearn
from sklearn.linear_model import LogisticRegression
import pickle
import time
import torch

from utils import test_accuracy, projection, veccos, binary_search_cx, \
                  find_exp_score, find_slab_score, project_l2_centroid, \
                  project_l2_centroid_straight, project_slab, project_slab_straight,\
                  contaminate_dataset

from attackers import StraightAttack, SemiOnlineAttack, ConcentratedAttack, GreedyAttack

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def experiment(dataset, taus, contamination_levels):
    
    filepath = "./data/"+dataset+"/"
    clf = LogisticRegression(fit_intercept=False, solver='liblinear')
    res = [[0 for a in taus] for b in contamination_levels]
    clf_offline = LogisticRegression(fit_intercept=False, solver='liblinear')
    w_res = [[] for b in contamination_levels]
    acc_res = [[0 for a in taus] for b in contamination_levels]
    
    for i in range(n_exp):    
        #print ("Running the {}-th experiment".format(i))
        start_time = time.time()
        filename = filepath+str(i)
        with open(filename,"rb") as f:
            datasets = pickle.load(f)

        X_init, Y_init = datasets[0] # generate defense constraints and init w if necessary.
        X_clean, Y_clean = datasets[1] # the clean data stream
        X_valid, Y_valid = datasets[2] # validation set
        X_test, Y_test = datasets[3]   # the actual test set

        clf.fit(X_init, Y_init)
        d = X_init.shape[1]
        w_0 = np.zeros((1,d))
        #print ("Initial accuracy is {}".format(test_accuracy(X_test, Y_test, w_0)))
        w_t = -clf.coef_
        #w_t /= norm(w_t)
        #w_0 = -w_t
        #print ("Target accuracy is {}".format(test_accuracy(X_test, Y_test, w_t)))
        
        clf_offline.fit(X_clean, Y_clean)
        w_b = clf_offline.coef_
        w_b /= norm(w_b)
        #w_b = -w_t
        
        if attack_method == "straight":
            attacker = StraightAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "greedy":
            attacker = GreedyAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack) 
        elif attack_method == "concentrated":
            attacker = ConcentratedAttack()
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack)         
        elif attack_method == "semi-online":
            attacker = SemiOnlineAttack()
            X_adv, Y_adv = X_clean[:n_attack, :], Y_clean[:n_attack]
            attacker.set_param(datasets, w_0, w_t, R, eta, 
                                        defense, n_iter_warmup, n_attack, (-X_adv, Y_adv))
        attacker.set_init_set(X_init, Y_init)
        if defense_method == "slab":
            mu, scores = attacker.slab_scores()
        elif defense_method == "norm":
            mu, scores = attacker.l2_norms()
        elif defense_method == "L2":
            mu, scores = attacker.l2_distances_to_centroid()
        
        attacker.warmup(n_iter_warmup)
        w_b = attacker.w_curr
        
        print (scores[-1], sum(scores)/len(scores), scores[int(len(scores)/2)])
        return (find_regime_threshold(mu, w_b, -w_b, defense_method))

In [3]:
def find_regime_threshold(mu, w_0, w_t, defense_method):
    
    if defense_method == "norm":
        return (0, 0)
    elif defense_method == "L2":
        mu1, mu0 = mu[0].flatten(), mu[1].flatten()
        #print (np.linalg.norm(mu1), np.linalg.norm(mu0))
        high = min(np.linalg.norm(mu1), np.linalg.norm(mu0))
        b = (w_t-w_0).flatten()
        if ((np.dot(mu1, b))>0) or ((np.dot(mu0, -b))>0):
            low = 0
        else:
            s1 = abs(np.dot(mu1, b)/np.linalg.norm(b))
            s2 = abs(np.dot(mu0, -b)/np.linalg.norm(-b))
            #print (s1,s2)
            low = min(s1, s2)
            low = max(low, 0)
        return (low, high)
    elif defense_method == "slab":
        mu1, mu0 = mu[0].flatten(), mu[1].flatten()
        b = mu1 - mu0
        if np.dot(b, (w_t-w_0).flatten())<0:
            b = -b
            #print ("flipped")
        t1 = np.dot(-b, mu1)
        t2 = np.dot(b, mu0)
        #print (t1, t2)
        low = min(t1, t2)
        low = max(low, 0)
        high = min(abs(t1), abs(t2))
        return (low, high)
    

In [4]:
n_exp = 1
 
#d, eta = 784, 0.01
#n_attack, n_clean, n_init, n_test, n_valid = 100, 8000, 1000, 1000, 500
#n_iter_warmup = n_clean
defense_method = "norm"
defense = {defense_method:0}
dataset = "MNIST"

R, max_tau = 10, 10

taus = [item/2 for item in range(0, max_tau*2)]
contamination_levels = [0]

#attack_methods = ["straight", "greedy", "semi-online", "concentrated"]
attack_method = "semi-online"
defense_methods = ["norm", "L2", "slab"]
thres = {}
for dataset in ["MNIST", "fashionMNIST", "IMDB", "BreastCancer"]:
    print (dataset)
    if dataset == "MNIST":
        d, eta = 784, 0.01
        n_attack, n_clean, n_init, n_test, n_valid = 100, 8000, 1000, 1000, 500
        n_iter_warmup = n_clean
    elif dataset == "fashionMNIST":
        d, eta = 784, 0.01
        n_attack, n_clean, n_init, n_test, n_valid = 100, 8000, 1000, 1000, 500
        n_iter_warmup = n_clean
    elif dataset == "IMDB":
        d, eta = 100, 0.01
        n_attack, n_clean, n_init, n_test, n_valid = 200, 10000, 5000, 5000, 2000
        n_iter_warmup = n_clean
    else:
        d, eta = 9, 0.05
        n_attack, n_clean, n_init, n_test, n_valid = 80, 400, 100, 100, 50
        n_iter_warmup = n_clean
        
    for defense_method in defense_methods:
        defense_range = "att-only"
        thres[(dataset, defense_method)] = experiment(dataset, taus, contamination_levels)
        #defense_range = "all-pts"
        #experiment(dataset, taus, contamination_levels)
        
print (thres)

MNIST
10.939077 5.9942205369472505 5.786255
10.630243 5.337613325327635 5.237629
29.806637 4.787447480698066 3.603691
fashionMNIST
15.78008818241856 7.916402778701682 7.543576856862157


KeyboardInterrupt: 

In [None]:
defense_method = "L2"
defense = {defense_method:0}

for attack_method in attack_methods:
    defense_range = "att-only"
    experiment(dataset, taus, contamination_levels)
    #defense_range = "all-pts"
    #experiment(dataset, taus, contamination_levels)

In [None]:
defense_method = "slab"
defense = {defense_method:0}

#attack_methods = ["straight", "greedy", "semi-online", "concentrated"]

for attack_method in attack_methods:
    defense_range = "att-only"
    experiment(dataset, taus, contamination_levels)
    #defense_range = "all-pts"
    #experiment(dataset, taus, contamination_levels)

In [None]:
defense_method = "L2"
defense = {defense_method:0}

contamination_levels = [0, 0.05, 0.1, 0.2]

attack_methods = ["straight"]

for attack_method in attack_methods:
    defense_range = "all-pts"
    experiment(dataset, taus, contamination_levels)

In [None]:
defense_method = "slab"
defense = {defense_method:0}
contamination_levels = [0, 0.05, 0.1, 0.2]

attack_methods = ["straight"]

for attack_method in attack_methods:
    defense_range = "all-pts"
    experiment(dataset, taus, contamination_levels)

In [None]:
print (round(1234,3))