In [16]:
from time import time

import torch
import sklearn.datasets
import sklearn.preprocessing
import sklearn.model_selection
import numpy as np
import torchvision
import torchvision.transforms as transforms
import onlinehd

import pandas as pd
import pickle
from onlinehd import CAE

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets
# import pdb
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import tkinter

In [17]:
SEED = 1234
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7ff8ec649b90>

In [18]:
def loss_function(W, x, recons_x, h, lam):
    """Compute the Contractive AutoEncoder Loss
    Evalutes the CAE loss, which is composed as the summation of a Mean
    Squared Error and the weighted l2-norm of the Jacobian of the hidden
    units with respect to the inputs.
    Args:
        `W` (FloatTensor): (N_hidden x N), where N_hidden and N are the
          dimensions of the hidden units and input respectively.
        `x` (Variable): the input to the network, with dims (N_batch x N)
        recons_x (Variable): the reconstruction of the input, with dims
          N_batch x N.
        `h` (Variable): the hidden units of the network, with dims
          batch_size x N_hidden
        `lam` (float): the weight given to the jacobian regulariser term
    Returns:
        Variable: the (scalar) CAE loss
    """
    mse = mse_loss(recons_x, x)
    # Since: W is shape of N_hidden x N. So, we do not need to transpose it as
    # opposed to #1
    dh = h * (1 - h) # Hadamard product produces size N_batch x N_hidden
    # Sum through the input dimension to improve efficiency, as suggested in #1
    w_sum = torch.sum(Variable(W)**2, dim=1)
    # unsqueeze to avoid issues with torch.mv
    w_sum = w_sum.unsqueeze(1) # shape N_hidden x 1
    contractive_loss = torch.sum(torch.mm(dh**2, w_sum), 0)
    return mse + contractive_loss.mul_(lam)

In [19]:
def CAEtrain(epoch, loader, recon, lam : float = 1e-4):
    model.train()
    train_loss = 0

    for idx, (data, _) in enumerate(loader):
        data = Variable(data)
        if args.cuda:
            data = data.cuda()
        optimizer.zero_grad()
        hidden_representation, recons_x = model(data)
        # Get the weights
        W = model.state_dict()['fc1.weight']

        loss = loss_function(W, data.view(-1, 784), recons_x,
                            hidden_representation, lam)
        loss.backward()
        train_loss += loss.data[0]
        optimizer.step()
        if idx%100==0:
            print(loss.data[0]/len(data))
        recon = model.save_data(data, epoch, idx, recon)

    return recon
    

In [20]:
# loads simple mnist dataset
def load():
    # fetches data
    # Using minst dataset provided by sklearn
    x, y = sklearn.datasets.fetch_openml('mnist_784', return_X_y=True)
    x = x.astype(float)
    y = y.astype(int)
    y = np.array(y)

    with open('MNIST_HD_DF.pickle', 'rb') as f:
        data = pickle.load(f)
        x_test_D = data['data']
        y_test_D = data['label']
    
    x_test_D = x_test_D.reshape(-1,784)

    # split and normalize
    x, x_test, y, y_test = sklearn.model_selection.train_test_split(x, y)
    scaler = sklearn.preprocessing.Normalizer().fit(x)
    x = scaler.transform(x)
    x_test = scaler.transform(x_test)
    x_test_D = scaler.transform(x_test_D)

    # changes data to pytorch's tensors
    x = torch.from_numpy(x).float() 
    y = torch.from_numpy(y).long()
    x_test = torch.from_numpy(x_test).float() 
    y_test = torch.from_numpy(y_test).long()
    x_test_D = torch.from_numpy(x_test_D).float()
    
    # preprocessing with CAE
    train_loader = DataLoader(CAE.MyDataset(x, y), batch_size=args.batch_size, shuffle=False, **kwargs)
#     attack_loader = DataLoader(CAE.MyDataset(x_test_D, y_test), batch_size=args.batch_size, shuffle=False, **kwargs)
    recon = torch.zeros(x.shape)
    mse_loss = nn.BCELoss(reduction='sum')
    for epoch in tqdm(range(4)):
        x_tmp = CAEtrain(epoch, train_loader, recon)
#     recon = torch.zeros(x_train_D.shape)
#     for epoch in range(args.epochs):
#         x_train_D = CAEtrain(epoch, attack_loader, recon)

#     train_loader = DataLoader(CAE.MyDataset(x_test, y_test), batch_size=args.batch_size, shuffle=False, **kwargs)
#     attack_loader = DataLoader(CAE.MyDataset(x_test_D, y_test), batch_size=args.batch_size, shuffle=False, **kwargs)
#     recon = torch.zeros(x_test.shape)
#     for epoch in tqdm(range(args.epochs)):
#         x_test = CAEtrain(epoch, train_loader, recon)
#     recon = torch.zeros(x_test_D.shape)
#     for epoch in tqdm(range(args.epochs)):
#         x_test_D = CAEtrain(epoch, attack_loader, recon)

    return x, x_test, y, y_test, model, scaler

In [21]:
from tqdm import tqdm

args = CAE.Args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
if args.cuda:
    torch.cuda.manual_seed(args.seed)

kwargs = {'num_workers': 5, 'pin_memory': True} if args.cuda else {}

mse_loss = nn.BCELoss(reduction='sum')

model = CAE.CAE()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
if args.cuda:
    model.cuda()

In [22]:
print('Loading...')
x, x_test, y, y_test, CAE_model, scaler = load()

Loading...


  0%|                                                     | 0/4 [00:00<?, ?it/s]

tensor(543.4429)
tensor(44.4497)
tensor(41.3416)
tensor(40.6125)
tensor(41.3480)


 25%|███████████                                 | 1/4 [01:59<05:57, 119.10s/it]

tensor(41.0364)
tensor(38.4574)
tensor(38.3459)
tensor(38.7589)
tensor(39.8535)


 50%|██████████████████████                      | 2/4 [04:00<04:00, 120.38s/it]

tensor(39.5987)
tensor(37.5289)
tensor(37.6671)
tensor(38.1836)
tensor(39.3383)


 75%|█████████████████████████████████           | 3/4 [05:57<01:58, 118.78s/it]

tensor(39.0724)
tensor(37.1520)
tensor(37.3431)
tensor(37.8779)
tensor(39.0526)


100%|████████████████████████████████████████████| 4/4 [07:52<00:00, 118.03s/it]


In [23]:
_, x_cae = CAE_model.forward(x)
_, x_test_cae = CAE_model.forward(x_test)

In [24]:
x_cae = torch.from_numpy(scaler.transform(x_cae.detach().numpy())).float()
x_test_cae = torch.from_numpy(scaler.transform(x_test_cae.detach().numpy())).float()

In [25]:
classes = y.unique().size(0)
features = x.size(1)
model = onlinehd.OnlineHD(classes, features) #OnlineHD initialize

if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    x_test = x_test.cuda()
    y_test = y_test.cuda()
    model = model.to('cuda')
    print('Using GPU!')

print('Training...')
t = time()
model = model.fit(x_cae, y, bootstrap=1.0, lr=0.035, epochs=300)
t = time() - t
print(f'{t = :6f}')

Training...
t = 117.119863


In [26]:
print('Validating...')
yhat = model(x_cae)
yhat_test = model(x_test_cae)

acc = (y == yhat).float().mean()
acc_test = (y_test == yhat_test).float().mean()

print(f'{acc = :6f}')
print(f'{acc_test = :6f}')

Validating...
acc = 0.889257
acc_test = 0.887143


In [28]:
import pickle

with open('hd_adversarial_sample/MNIST_HD_FGSM.pickle', 'rb') as f:
    FGSM = pickle.load(f)

FGSM_001 = FGSM['data']['0.01']
FGSM_003 = FGSM['data']['0.03']
FGSM_007 = FGSM['data']['0.07']
FGSM_01 = FGSM['data']['0.1']

with open('hd_adversarial_sample/MNIST_HD_DF.pickle', 'rb') as f:
    DF = pickle.load(f)

DF_data = DF['data']

with open('hd_adversarial_sample/MNIST_HD_JSMA.pickle', 'rb') as f:
    JSMA = pickle.load(f)

JSMA_data = JSMA['data']

In [29]:
_, FGSM_001 = CAE_model.forward(FGSM_001)
_, FGSM_003 = CAE_model.forward(FGSM_003)
_, FGSM_007 = CAE_model.forward(FGSM_007)
_, FGSM_01 = CAE_model.forward(FGSM_01)
_, DF_data = CAE_model.forward(DF_data)
_, JSMA_data = CAE_model.forward(JSMA_data)

In [30]:
FGSM_001 = torch.from_numpy(scaler.transform(FGSM_001.detach().numpy().reshape(-1, 784))).float()
FGSM_003 = torch.from_numpy(scaler.transform(FGSM_003.detach().numpy().reshape(-1, 784))).float()
FGSM_007 = torch.from_numpy(scaler.transform(FGSM_007.detach().numpy().reshape(-1, 784))).float()
FGSM_01 = torch.from_numpy(scaler.transform(FGSM_01.detach().numpy().reshape(-1, 784))).float()
DF_data = torch.from_numpy(scaler.transform(DF_data.detach().numpy().reshape(-1, 784))).float()
JSMA_data = torch.from_numpy(scaler.transform(JSMA_data.detach().numpy().reshape(-1, 784))).float()

In [31]:
print('Validating...')

FGSM_001_yhat = model(FGSM_001)
FGSM_003_yhat = model(FGSM_003)
FGSM_007_yhat = model(FGSM_007)
FGSM_01_yhat = model(FGSM_01)
DF_data_yhat = model(DF_data)
JSMA_data_yhat = model(JSMA_data)

FGSM_001_acc = (y_test == FGSM_001_yhat).float().mean()
FGSM_003_acc = (y_test == FGSM_003_yhat).float().mean()
FGSM_007_acc = (y_test == FGSM_007_yhat).float().mean()
FGSM_01_acc = (y_test == FGSM_01_yhat).float().mean()
DF_acc = (y_test == DF_data_yhat).float().mean()
JSMA_acc = (y_test == JSMA_data_yhat).float().mean()

print(f'{FGSM_001_acc = :6f}')
print(f'{FGSM_003_acc = :6f}')
print(f'{FGSM_007_acc = :6f}')
print(f'{FGSM_01_acc = :6f}')
print(f'{DF_acc = :6f}')
print(f'{JSMA_acc = :6f}')

Validating...
FGSM_001_acc = 0.877943
FGSM_003_acc = 0.816914
FGSM_007_acc = 0.595771
FGSM_01_acc = 0.446743
DF_acc = 0.872229
JSMA_acc = 0.879943
