In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd import grad
from utils import *

In [2]:
def redim(x):
    return(np.append(x, np.ones([x.shape[0],1]), axis = 1))

In [3]:
def stand(x, mean, std):
    x = x - mean
    x = x/std 
    return(x)

## Benchmark for regression

In [4]:
data = pd.read_csv("data/winequality-white.csv", sep = ";")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [5]:
X = data.loc[:, data.columns != "quality"]
y = data.quality

Extract first few PCA components

In [6]:
pca = PCA(n_components=11, svd_solver='full')
pca.fit(X)                 
print(pca.explained_variance_ratio_) 
x = pca.fit_transform(X)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train = np.asarray(x_train,dtype=np.float32)
y_train = np.asarray(y_train,dtype=np.float32).reshape(-1,1)
x_test = np.asarray(x_test,dtype=np.float32) #un poco trampa
y_test = np.asarray(y_test,dtype=np.float32).reshape(-1,1)
### to torch
x_train = Variable( torch.from_numpy(x_train) )
y_train = Variable( torch.from_numpy(y_train) )
x_test = torch.from_numpy(x_test) 
y_test = torch.from_numpy(y_test) 

[9.09657344e-01 7.93338631e-02 1.01542742e-02 5.06004450e-04
 3.23409395e-04 8.72769740e-06 6.72986618e-06 5.39060918e-06
 4.07002123e-06 1.86525322e-07 1.49217279e-10]


In [7]:
x_train_renom = stand(x_train, x_train.mean(dim=0), x_train.std(dim=0))
x_test_renom = stand(x_test, x_train.mean(dim=0), x_train.std(dim=0))

### Using torch

In [8]:
w = torch.randn(1, x_train.shape[1], requires_grad=True)
b = torch.randn(1, requires_grad=True)
lmb = 0.0

In [9]:
def model(x, w, b):
    return( x @ w.t() + b )

def mse(t1, t2, w):
    diff = t1 - t2
    return( torch.sum(diff * diff) / diff.numel() + lmb*w @ w.t() )

In [10]:
lr = 0.01
epochs = 1000

In [11]:
for epoch in range(epochs):
    epoch += 1
    preds = model(x_train_renom, w, b)
    loss = mse(preds, y_train, w)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * lr
        b -= b.grad * lr
        w.grad.zero_()
        b.grad.zero_()
        
    if epoch%1000== 0:
        print('epoch {}, loss {}'.format(epoch,loss.data[0]))
    

epoch 1000, loss tensor([0.5823])


In [12]:
print(w)
print(b)

tensor([[-0.1357,  0.1397, -0.0269,  0.3645,  0.0027,  0.0628,  0.0802,  0.0006,
         -0.1693, -0.0171, -0.0891]], requires_grad=True)
tensor([5.8926], requires_grad=True)


In [13]:
preds = model(x_test_renom, w, b)
torch.sqrt( torch.mean( (preds - y_test )**2 ) )

tensor(0.7213, grad_fn=<SqrtBackward>)

In [14]:
def rmse(y, pred):
    return torch.sqrt( torch.mean( (pred - y )**2 ) )

In [15]:
rmse(y_test, preds)

tensor(0.7213, grad_fn=<SqrtBackward>)

## Adversarial attack. Non-Bayesian case

Let $X$ denote the clean dataset, and $X^* = T(X, \beta)$ the attacked dataset, when the classifier choose parameters $\beta$. We try to solve the following Defend-Attack game

$$
\beta^* = \arg\min_{\beta} \widehat{\theta}_C [\beta, T(X, \beta)] = \arg\min_{\beta} \sum_{i=1}^n \left( T(x, \beta)^{[i]}\beta^{\top} - y_i \right)^2 + \lambda \beta \beta^{\top}
$$

subject to

$$
X^* = T(X, \beta) = \arg\min_{X'} \widehat{\theta}_A [\beta, X'] = \arg\min_{X'} \sum_{i=1}^n c_{i}\left( X'^{[i]}\beta^{\top} - z_i \right)^2 + ||X-X'||^2_{F}
$$

Where $y$ are the true labels, $z$ are the targets and $c$ are instance-specific factors, which are common knowledge here. We can solve exactly the attacker's problem, yielding

$$
X^* = T(X, \beta) = X - \left(\text{diag}(c_d)^{-1} + \beta \beta^{\top} I_n \right)^{-1} (X\beta - z)\beta^\top
$$

We could then compute the gradient for the classifier problem using

$$
\nabla \widehat{\theta}_C [\beta, T(X, \beta)] = \nabla_{\beta} \widehat{\theta}_C [\beta, T(X, \beta)] + \nabla_T  \widehat{\theta}_C [\beta, T(X, \beta)] \frac{\partial T(X,\beta)}{\partial \beta}
$$

and use gradient descent to find $\beta^*$.

### Attack

In [16]:
def attack(w, b, test, c_d, z):
    c_d = ( 1/c_d + w @ w.t() )**(-1)
    p1 = torch.diag( c_d[0] )
    #p1 = torch.inverse( torch.inverse( torch.diag(c_d) ) +  w @ w.t() * torch.eye( test.shape[0] ) )
    p2 = ( test @ w.t() + b - z)@w 
    out = test - p1 @ p2
    return(out)

In [17]:
value = 0.5 ## Same c_i for every instance
c_d = torch.ones(len(y_test))*value
z = torch.zeros([len(y_test),1]) 
out = attack(w, b, x_test_renom, c_d, z)

In [18]:
pred_at =  model(out, w, b)
pred_clean = model(x_test_renom, w, b)
print(w)
print("Clean test RMSE: ", torch.sqrt( torch.mean( (pred_clean - y_test )**2 ) ) )
print("Attacked est RMSE: ", torch.sqrt( torch.mean( (pred_at- y_test )**2 ) ) )

tensor([[-0.1357,  0.1397, -0.0269,  0.3645,  0.0027,  0.0628,  0.0802,  0.0006,
         -0.1693, -0.0171, -0.0891]], requires_grad=True)
Clean test RMSE:  tensor(0.7213, grad_fn=<SqrtBackward>)
Attacked est RMSE:  tensor(0.9126, grad_fn=<SqrtBackward>)


### Defense

In [19]:
w_clean = torch.randn(1, x_train.shape[1], requires_grad=True)
b_clean = torch.randn(1, requires_grad=True)

In [20]:
def learner_cost(w, b, x, y, lmb, c_d, z):
    out = attack(w, b, x, c_d, z)
    #out = stand(out, out.mean(dim=0), out.std(dim=0))
    #print(out.std(dim=0))
    return torch.sum( (out @ w.t() + b - y)**2 ) +  lmb * w @ w.t()

In [21]:
lr = 10e-6
epochs = 500
value = 0.5
c_d = torch.ones(len(y_train))*value
z = torch.zeros([len(y_train),1]) 
print("Initial Cost", learner_cost(w_clean, b_clean, x_train_renom, y_train, lmb, c_d, z))
for epoch in range(epochs):
    epoch += 1
    loss = learner_cost(w_clean, b_clean, x_train_renom, y_train, lmb, c_d, z)
    loss.backward()
    with torch.no_grad():
        w_clean -= w_clean.grad * lr
        b_clean -= b_clean.grad * lr
        w_clean.grad.zero_()
        b_clean.grad.zero_()
        
    if epoch%100 == 0:
        print('epoch {}, loss {}'.format(epoch,loss.data[0]))

Initial Cost tensor([[133705.3125]], grad_fn=<AddBackward0>)
epoch 100, loss tensor([5444.7485])
epoch 200, loss tensor([2060.9519])
epoch 300, loss tensor([2015.6543])
epoch 400, loss tensor([2004.2576])
epoch 500, loss tensor([1999.9907])


In [22]:
pred_at =  model(attack(w_clean, b_clean, x_train_renom, c_d, z), w_clean, b_clean)
torch.sqrt( torch.mean( (pred_at - y_train )**2 ) )

tensor(0.7638, grad_fn=<SqrtBackward>)

In [23]:
print(w_clean,b_clean)

tensor([[-0.1418,  0.1457, -0.0285,  0.3789,  0.0027,  0.0653,  0.0836,  0.0008,
         -0.1758, -0.0180, -0.0924]], requires_grad=True) tensor([6.5811], requires_grad=True)


### Test

In [24]:
value = 0.5
c_d = torch.ones(len(y_test))*value
z = torch.zeros([len(y_test),1]) 
pred_at = model(attack(w_clean, b_clean, x_test_renom, c_d, z), w_clean, b_clean)
torch.sqrt( torch.mean( (pred_at - y_test )**2 ) )

tensor(0.7225, grad_fn=<SqrtBackward>)

In [25]:
pred_at = model(x_test_renom, w_clean, b_clean)
torch.sqrt( torch.mean( (pred_at - y_test )**2 ) )

tensor(1.0113, grad_fn=<SqrtBackward>)

## Adversarial attack. Bayesian case

Now, we include uncertainty about the parameters of the adversary, in particular about $c$, the instance-specific factors. We choos a prior $\pi(c)$ reflecting the defender's uncertainty. Thus we should solve

$$
\beta^* = \arg\min_{\beta} \int \widehat{\theta}_C [\beta, T(X, \beta, c)] d\pi(c) 
$$

subject to

$$
X^* = T(X, \beta) = \arg\min_{X'} \widehat{\theta}_A [\beta, X', c] = \arg\min_{X'} \sum_{i=1}^n c_{i}\left( X'^{[i]}\beta^{\top} - z_i \right)^2 + ||X-X'||^2_{F}
$$

where we have made explicit the dependence on $c$. To solve this problem, we can estimate the gradient of the defender's cost function with respect to $\beta$ as follows

$$
\nabla \int \widehat{\theta}_C [\beta, T(X, \beta, c)] d\pi(c)  = \int \nabla \widehat{\theta}_C [\beta, T(X, \beta, c)] d\pi(c) = \mathbb{E}_{\pi(c)} \left \lbrace \nabla \widehat{\theta}_C [\beta, T(X, \beta, c)] \right \rbrace
$$

Thus, we can get an unbiased estimate of the gradient, sampling $c_1, \dots, c_K \sim \pi(c)$ and computing

$$
\frac{1}{K} \sum_{i=1}^K \nabla \widehat{\theta}_C [\beta, T(X, \beta, c_i)]
$$

We use here a Gamma distribution as prior.

In [26]:
w_bayes = torch.randn(1, x_train.shape[1], requires_grad=True)
b_bayes = torch.randn(1, requires_grad=True)
###
mean = 0.5
var = 0.01
m = torch.distributions.Gamma(torch.tensor([mean**2/var]), torch.tensor([mean/var])) ## shape, rate
sample_test = m.sample(torch.Size([len(y_test)]))
z = torch.zeros([len(y_train),1]) 
n_samples = 10


In [27]:
lr = 10e-6
epochs = 100
z = torch.zeros([len(y_train),1]) 
for epoch in range(epochs):
    epoch += 1
    wgrad = torch.zeros(1, x_train.shape[1])
    bgrad = torch.zeros(1)
    sample = m.sample(torch.Size([n_samples, len(y_train)]))
    ### Forma cutre. Vectorizar
    for i in range(n_samples):
        c_d = sample[i].t()[0]
        loss = learner_cost(w_bayes, b_bayes, x_train_renom, y_train, lmb, c_d, z)
        loss.backward()
        wgrad += w_bayes.grad
        bgrad += b_bayes.grad
        w_bayes.grad.zero_()
        b_bayes.grad.zero_()
    ####     
    wgrad /= n_samples
    bgrad /= n_samples
        
    with torch.no_grad():
        w_bayes -= wgrad * lr
        b_bayes -= bgrad * lr

        
    if epoch%10 == 0:
        print('epoch {}, loss {}'.format(epoch,loss.data[0]))


epoch 10, loss tensor([114847.2578])
epoch 20, loss tensor([82031.2266])
epoch 30, loss tensor([16142.8252])
epoch 40, loss tensor([5854.7026])
epoch 50, loss tensor([3296.0908])
epoch 60, loss tensor([2596.5701])
epoch 70, loss tensor([2353.6360])
epoch 80, loss tensor([2243.7227])
epoch 90, loss tensor([2186.7139])
epoch 100, loss tensor([2149.5945])


In [28]:
c_d = sample[0].t()[0]
z = torch.zeros([len(y_train),1]) 
pred_at = model(attack(w_bayes, b_bayes, x_train_renom, c_d, z), w_bayes, b_bayes)
torch.sqrt( torch.mean( (pred_at - y_train )**2 ) )

tensor(0.7912, grad_fn=<SqrtBackward>)

In [29]:
c_d = sample_test.t()[0]
z = torch.zeros([len(y_test),1])

print("____Non-Strategic Defender____")
###
pred_clean = model(x_test_renom, w, b)
print( "Loss Clean test set: ", rmse(pred_clean, y_test) )
###
pred_at = model(attack(w, b, x_test_renom, c_d, z), w, b)
print( "Loss attacked test set: ", rmse(pred_at, y_test) )
###
print("\n____Strategic Bayes Defender____")
###
pred_clean = model(x_test_renom, w_bayes, b_bayes)
print( "Loss Bayes Clean test set: ", rmse(pred_clean, y_test) )
###
pred_at = model(attack(w_bayes, b_bayes, x_test_renom, c_d, z), w_bayes, b_bayes)
print( "Loss Bayes attacked test set: ", rmse(pred_at, y_test) )
###
print("\n____Strategic Nash Defender____")
###
pred_clean = model(x_test_renom, w_clean, b_clean)
print( "Loss Nash Clean test set: ", rmse(pred_clean, y_test) )
###
pred_at = model(attack(w_clean, b_clean, x_test_renom, c_d, z), w_clean, b_clean)
print( "Loss Nash attacked test set: ", rmse(pred_at, y_test) )
###


____Non-Strategic Defender____
Loss Clean test set:  tensor(0.7213, grad_fn=<SqrtBackward>)
Loss attacked test set:  tensor(0.9206, grad_fn=<SqrtBackward>)

____Strategic Bayes Defender____
Loss Bayes Clean test set:  tensor(0.7726, grad_fn=<SqrtBackward>)
Loss Bayes attacked test set:  tensor(0.7527, grad_fn=<SqrtBackward>)

____Strategic Nash Defender____
Loss Nash Clean test set:  tensor(1.0113, grad_fn=<SqrtBackward>)
Loss Nash attacked test set:  tensor(0.7383, grad_fn=<SqrtBackward>)


# Trash

The defender's cost is

$$
\theta_l = \sum_{i=1}^n c_{l,i} \left( \phi(\omega, x)^{[i]}\omega^{\top} - y_i \right)^2 + \omega \omega^{\top}
$$
We will need to compute

$$
\frac{d \theta}{d\omega_j} = \sum_{i=1}^n c_{l,i} \left[ 2 \left( \phi(\omega, x)^{[i]}\omega^{\top} - y_i \right) \phi^{[i,j]} + 2 \left( \phi(\omega, x)^{[i]}\omega^{\top} - y_i \right) \omega \left[ \frac{\partial \phi^i}{\partial \omega_j}\right]^{\top}\right] + 2 \omega_j
$$


In [14]:
def learner_cost(w, b, x, y, lmb):
    return torch.sum( (x @ w.t() + b - y)**2 ) +  lmb * w @ w.t()

In [15]:
w.grad.zero_()
x_aux = Variable(x_test, requires_grad = True)
cost_defender = learner_cost(w, b, x_aux, y_test, 0.0)
cost_defender.backward()
print(w.grad) ### This is the grad wrt w, the ith element is first summand above of the ith term
print(x_aux.grad) ## The ith row is the first part of the second summand above, of the ith element
x_aux.grad.zero_()

tensor([[-20.6633,  -4.1001,   4.4785,   2.4156,   9.6904,  -9.3381,
          -0.2540,  -3.7007, -21.7405,   3.4108,  -2.7130]])
tensor([[-1.1637e-02,  1.1262e-02, -3.3636e-03,  2.8633e-02, -2.3743e-05,
          5.5320e-03,  6.9226e-03, -1.0277e-05, -1.3710e-02, -1.3846e-03,
         -6.7525e-03],
        [-2.7212e-01,  2.6336e-01, -7.8657e-02,  6.6956e-01, -5.5521e-04,
          1.2936e-01,  1.6188e-01, -2.4032e-04, -3.2059e-01, -3.2377e-02,
         -1.5790e-01],
        [ 7.2265e-02, -6.9939e-02,  2.0889e-02, -1.7781e-01,  1.4744e-04,
         -3.4355e-02, -4.2990e-02,  6.3821e-05,  8.5138e-02,  8.5983e-03,
          4.1934e-02],
        [-8.5773e-02,  8.3013e-02, -2.4793e-02,  2.1105e-01, -1.7501e-04,
          4.0776e-02,  5.1026e-02, -7.5751e-05, -1.0105e-01, -1.0206e-02,
         -4.9773e-02],
        [ 2.0279e-01, -1.9627e-01,  5.8618e-02, -4.9898e-01,  4.1376e-04,
         -9.6407e-02, -1.2064e-01,  1.7910e-04,  2.3892e-01,  2.4129e-02,
          1.1768e-01],
        [ 6.454

tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  

In [16]:
def jacobian(inputs, outputs):
    return torch.stack([grad([outputs[:, i].sum()], [inputs], retain_graph=True, create_graph=True)[0]
                        for i in range(outputs.size(1))], dim=-1)


In [17]:
value = 0.1
c_d = torch.ones(len(y_train))*value
z = torch.zeros([len(y_train),1]) 

out = attack(w, x_train, c_d, z)

## The i,j element of this is the derivative of the 0,j element of the transformation wrt w_i
## The ith row is the derivative of phi(0) wrt w_i
w.grad.zero_()
jacobian(w, out[0].reshape(1,-1))[0]

tensor([[-5.3646e-01, -1.3400e-02,  4.0022e-03, -3.4068e-02,  2.8250e-05,
         -6.5822e-03, -8.2368e-03,  1.2228e-05,  1.6312e-02,  1.6474e-03,
          8.0344e-03],
        [-9.5354e-03, -5.4107e-01, -2.7563e-03,  2.3462e-02, -1.9455e-05,
          4.5331e-03,  5.6726e-03, -8.4213e-06, -1.1234e-02, -1.1346e-03,
         -5.5332e-03],
        [ 2.3728e-02, -2.2965e-02, -5.4344e-01, -5.8385e-02,  4.8413e-05,
         -1.1280e-02, -1.4116e-02,  2.0956e-05,  2.7955e-02,  2.8233e-03,
          1.3769e-02],
        [-1.1740e-02,  1.1362e-02, -3.3936e-03, -5.2142e-01, -2.3954e-05,
          5.5813e-03,  6.9842e-03, -1.0368e-05, -1.3832e-02, -1.3969e-03,
         -6.8126e-03],
        [ 3.0358e-03, -2.9381e-03,  8.7751e-04, -7.4697e-03, -5.5030e-01,
         -1.4432e-03, -1.8060e-03,  2.6811e-06,  3.5766e-03,  3.6121e-04,
          1.7616e-03],
        [-1.1209e-02,  1.0848e-02, -3.2400e-03,  2.7581e-02, -2.2870e-05,
         -5.4497e-01,  6.6683e-03, -9.8993e-06, -1.3206e-02, -1.3337e-0

### Optimal Defense

In [341]:
def opt_defense(x, y, w, c_d, z):
    out = attack(w, x, c_d, z)
    m1 = torch.eye(out.shape[1]) + out.t() @ out
    inv = torch.inverse( m1 )
    last = out.t() @ y
    w_opt = inv @ last
    return(w_opt)

In [342]:
w_opt = opt_defense(x_train, y_train, w, c_d, z)

In [344]:
pred_after_defense = x_train @ w_opt + b

In [346]:
torch.sqrt( torch.mean( (pred_after_defense - y_train )**2 ) ) 

tensor(1.7312)

In [179]:
a = np.random.randn(10)
b = np.random.randn(10)
df = pd.DataFrame({"first":range(10), "sec":b})

In [181]:
df.to_csv("lele", index=False)
