In [1]:
# Sample from an orthogonal matrix of N dimension, 2k vectors and train a classifier, train classifier, note epsilon
from scipy.stats import ortho_group
import numpy as np
import scipy.linalg
from models import NeuralNet, CNN_OneD
import torch
from torch import nn
from advertorch.attacks import LinfPGDAttack, L2PGDAttack, DDNL2Attack
import ipdb
import itertools

In [2]:
# input_shape = 100
# x = ortho_group.rvs(input_shape)

In [3]:
def train(model, data, labels, epochs = 1000):
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    batch_size = data.shape[0]
    for epoch in range(epochs):
        if epoch==epochs//2:
            for param_group in optimizer.param_groups:
                param_group['lr'] /=10
        correct = 0
        total_loss = 0
#         ipdb.set_trace()
        for idx in range(data.shape[0]//batch_size):
            batch_data, batch_labels = data[idx*batch_size:(idx+1)*batch_size], labels[idx*batch_size:(idx+1)*batch_size]
            out = model(batch_data)
            loss = criterion(out, batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
#             ipdb.set_trace()
            pred = out.argmax(1)
            correct += (pred == batch_labels).sum().item()
            total_loss += loss.item()*batch_size
#         if epoch%(epochs/5)==0:
#             print("Epoch: {}, Accuracy: {:.2f}, Loss: {:.5f}".format(epoch, 100*correct/data.shape[0], total_loss/data.shape[0]))
    return model

def test_pgd(model, data, labels, epsilon, res_row):
    model.eval()
    criterion = nn.CrossEntropyLoss(reduction='sum')
    adversary = L2PGDAttack(model, loss_fn=criterion, nb_iter=100, eps_iter=epsilon/50,
                                rand_init=True, eps=epsilon, clip_min=data.min().item(), clip_max=data.max().item(), targeted=False)
    # Set requires_grad attribute of tensor. Important for Attack
    data.requires_grad = True
#     ipdb.set_trace()
    perturbed_data = adversary.perturb(data, labels)
    new_out = model(perturbed_data)
#     ipdb.set_trace()
    pred = new_out.argmax(1)
    correct = (pred==labels).sum()
    res_row.append(100*float(correct)/labels.shape[0])
#     print("Epsilon: {}, Accuracy: {}".format(epsilon, 100*float(correct)/labels.shape[0]))
    
def test_ddn(model, data, labels, res_row):
    model.eval()
    criterion = nn.CrossEntropyLoss(reduction='sum')
    adversary = DDNL2Attack(model, nb_iter=1000, quantize = False, clip_min=data.min().item(), clip_max=data.max().item(),
                           loss_fn=criterion,)
#     adversary = L2PGDAttack(model, loss_fn=nn.MSELoss(reduction="sum"), nb_iter=40, eps_iter=epsilon/20,
#                                 rand_init=True, eps=epsilon, clip_min=data.min().item(), clip_max=data.max().item(), targeted=False)
    # Set requires_grad attribute of tensor. Important for Attack
    data.requires_grad = True
#     ipdb.set_trace()
    perturbed_data = adversary.perturb(data, labels)
    new_out = model(perturbed_data)
    pred = new_out.argmax(1)
    correct = (pred==labels).sum()
#     ipdb.set_trace()
    l2_distances = np.linalg.norm((data - perturbed_data).detach().cpu().numpy(),
                                  ord=2, axis=1)
    res_row.append(np.mean(l2_distances))
    res_row.append(np.median(l2_distances))
#     print("Samples: {}, Median L2: {},"
#           "Mean L2: {}, Accuracy: {}".format(l2_distances.shape[0], l2_distances.median(),
#                                              l2_distances.mean(), 100*float(correct)/labels.shape[0]))
  

In [4]:
def add_linear_seperator(d1, d2, p):
    z_1 = np.mean(d1, axis=0)
    z_2 = np.mean(d2, axis=0)
    z_1 /= np.linalg.norm(z_1)
    z_2 /= np.linalg.norm(z_2)
    
    d1_n = z_1*p + d1*(1-p)
    d2_n = z_2*p + d2*(1-p)
    d1_n /= np.expand_dims(np.linalg.norm(d1_n, axis=1), 1)
    d2_n /= np.expand_dims(np.linalg.norm(d2_n, axis=1), 1)
    return d1_n, d2_n

def sample_orth_datasets(dimension, samples, p):
    if p < 0:
        d1, d2 = x[:k], x[dimension//2:dimension//2+samples]
    else:
        d1, d2 = x[:k], x[dimension//2:dimension//2+samples]
        z1, z2 = x[k-1], x[dimension//2+samples-1]
#         d1 = d1*(1-p) + z1*p
#         d2 = d2*(1-p) + z2*p
        d1 = d1 + z1*p
        d2 = d2 + z2*p
#         d1 /= np.expand_dims(np.linalg.norm(d1, axis=1), 1)
#         d2 /= np.expand_dims(np.linalg.norm(d2, axis=1), 1)
    return d1, d2

In [8]:
# for k in [10, 20, 50, 100, 500, 999, 1000]:
dimensions = [4000]
# p_values = [-1, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3]
p_values = [-1]
# samples = [50, 100, 250, 500, 1000]
samples = [50, 100, 200]
# samples = [50]
# dimensions = [1000]
# p_values = [-1]
# samples = [500]
# cols = ['Input Shape, Samples, p, ']
data_all = []

for input_shape, p  in itertools.product(dimensions, p_values):
    x = ortho_group.rvs(input_shape)
#     print("###################")
#     print("###################")
#     print("###################")
    for k in samples:
        res_row = []
#         if 2*k != input_shape:
#             continue
        d1, d2 = sample_orth_datasets(input_shape, k, p)
#         d1, d2 = x[:k],x[input_shape//2:input_shape//2+k]
#         d1, d2 = add_linear_seperator(d1, d2, p)
#         d1, d2 = x[:k], -x[:k]
        data = torch.tensor(np.concatenate((d1, d2))).float().cuda()
        labels = torch.tensor(np.concatenate(([0]*d1.shape[0], [1]*d2.shape[0]))).long().cuda()
        idx_random = np.random.permutation(data.shape[0])
        data, labels = data[idx_random], labels[idx_random]
#         print("{}, {}, {}".format(input_shape, k, p))
        res_row.append(input_shape)
        res_row.append(k)
        res_row.append(p)
#         model = NeuralNet(input_shape, 1, 100, 2, no_final=True)
        model = CNN_OneD(hidden_channels=128, kernel_size=1000,
                         num_classes = 2, no_final=True)
        model = model.cuda()
        model = train(model, data, labels, 1000)
#         print("-----------------------")
#         print("-----------------------")
#         for epsilon in [0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]:
#     #     for epsilon in [0.1, 0.12, 0.15, 0.18, 0.2, 0.3, 0.4, 0.5]:
#             test_pgd(model, data, labels, epsilon, res_row)
        test_ddn(model, data, labels, res_row)
        data_all.append(res_row)
        print(','.join(['{:.5f}'.format(x) for x in res_row]))
    
        print("-----------------------")
#         print("-----------------------")
#     print("###################")
#     print("###################")
#     print("###################")

4000.00000,50.00000,-1.00000,0.02147,0.02058
-----------------------
4000.00000,100.00000,-1.00000,0.01843,0.01777
-----------------------
4000.00000,200.00000,-1.00000,0.01619,0.01500
-----------------------


p = 0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2

In [6]:
# data_all[0]

In [7]:
import pickle as pkl

In [8]:
with open('results/fcn_orthogonal_data.pkl', 'wb') as f:
    pkl.dump(data_all, f)

In [9]:
# print(res_row)

In [10]:
# d1[0]

In [11]:
# np.concatenate(([-1]*d1.shape[0], [1]*d2.shape[0]))

In [12]:
z = np.mean(x[:10], axis=0)

In [13]:
z_1 = np.mean(x[:50], axis = 0)
norm_1 = np.linalg.norm(z_1)
z_2 = np.mean(x[50:], axis=0)
norm_2 = np.linalg.norm(z_2)
z_1 /= norm_1
z_2 /= norm_2

In [14]:
x_n1 = x[:50]*0.96 + z_1*0.04

In [15]:
np.expand_dims(np.linalg.norm(x_n1, axis=1), 1).shape

(50, 1)

In [16]:
np.linalg.norm(x_normed1[0])

NameError: name 'x_normed1' is not defined

In [None]:
np.sum(x_normed1[0]*x_normed1[3])