# guidelines

TODO : import whenever needed, not centralized

states https://pytorch.org/tutorials/beginner/saving_loading_models.html

# Introduction 

## Aim

## Data

First load the dataset:

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
#%cd drive/MyDrive/
#%cd Colab\ Notebooks
#%cd CS439/optml_project/
!ls

/content/drive/MyDrive
/content/drive/MyDrive/Colab Notebooks
/content/drive/MyDrive/Colab Notebooks/CS439/optml_project
 adversary.py	        data_utils.py		  Nesterov.ipynb   res
 adv_test.py	        Hyperparam-tuning.ipynb   net.py	   Research
 alt_adv_test.py        Matt_notebook.ipynb	  optimizer.py	   test.py
 Baris_Notebook.ipynb   Merged-notebook.ipynb	  __pycache__	   training.py
 data		       'MF Notebook.ipynb'	  README.md


In [4]:
from data_utils import get_mnist

train_dataset, test_dataset = get_mnist(normalize=True)

# Import (Remove section later on)

In [14]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd

In [10]:
from adversary import attack, protected_training
from net import Net
from torch.optim import Optimizer
from training import training, testing
from pathlib import Path
from data_utils import build_data_loaders
import random

## Setup

Below one can find flags that will setup the notebook:

In [5]:
# Whether to tune the hyperparameters in this notebook
# Note that this might take a long time (especially for Adam)
hyperparameter_tune = False
prot_hyperparameter_tune = False

In [6]:
# Whether to use the GPU, if it's not available, this will be ignored
use_cuda = True

device = torch.device('cuda' if use_cuda and torch.cuda.is_available() else 'cpu')
print("Device chosen is {}".format(device))

Device chosen is cpu


We setup the training parameters that we will use all along the notebook, in order to improve readability in downstream code:

Note that we will use a model with a 10-dimensional output, where each output is passed through softmax. When receiving an output 

$$Z = \begin{bmatrix} \mathbf z_1 & \dots & \mathbf z_B \end{bmatrix}^\top \in \mathbb R^{B \times 10}$$

with $B$ the batch size, we first retrieve the maximal component of each $\mathbf z_i$:

$$\hat y_i = \text{argmax}_{k = 1, \ldots, 10} \; z_{ik}, \quad i = 1, \ldots, B$$

and then compute the accuracy:

$$\text{acc} = \frac 1 B \sum_{i=1}^B I\left\{ \hat y_i = y_i \right\} $$

with $I$ the indicator function and $y_i \in \{1, \ldots, 10\}$ the true target. 

In [13]:
from training import accuracy

training_config = {
    # Loss function
    'loss_fun': torch.nn.CrossEntropyLoss(),
    # Performance evaluation function
    'metric_fun': accuracy,
    # The device to train on
    'device': device,
    # Number of epochs
    'epochs': 10,
}

test_config = training_config.copy()
test_config.pop('epochs')

10

In [14]:
# View the source code
??accuracy

# Model

We use a simple standard model for the MNIST dataset (can be found [here](https://github.com/floydhub/mnist/blob/master/ConvNet.py)).

In [15]:
from net import Net

In [16]:
??Net

# Hyperparameter tuning

In [1]:
from training import tune_optimizer
from optimizer import AdamOptimizer, NesterovOptimizer, MiniBatchOptimizer
from data_utils import get_best_hyperparams

If the `hyperparameter_tune` flag was set to `True` above, the following code will run hyperparameter tuning on all optimizers. Note that one can either run KFold cross validation (by providing `n_folds`) or use a simple train/test split (by providing `train_ratio`).

If the flag is set to `False`, the cell below will simply set up the hyperparameters that we carefully cross-validated:

In [11]:
optimizers = {
    AdamOptimizer: get_best_hyperparams('./res/adam_tuning_round3.json'),
    NesterovOptimizer: get_best_hyperparams('./res/nesterov_tuning_round2.json'),
    MiniBatchOptimizer: get_best_hyperparams('./res/minibatch_tuning_round2.json')
}

## Adam

In [19]:
search_grid_adam = {
        'lr': np.linspace(0.001, 0.01, 2),
        'beta1':  np.linspace(0.1, 0.9, 2),
        'beta2': np.linspace(0.5, 0.999, 2),
        'batch_size': [32, 64, 128],
        'weight_decay': np.linspace(0.001, 0.1, 2),
        'epsilon': np.linspace(1e-10, 1e-8, 2),
    }

if hyperparameter_tune:
    results_adam = tune_optimizer(
        model=Net().to(device),
        optim_fun=AdamOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_adam,
        nfolds=3,
        **training_config)

else:
    results_adam = optimizers[AdamOptimizer]

## Nesterov

In [20]:
search_grid_nesterov = {
    'lr': np.logspace(0, 1),
    'batch_size': [32, 64, 128]
}

if hyperparameter_tune:
    results_nesterov = tune_optimizer(
        model=Net().to(device),
        optim_fun=NesterovOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_nesterov,
        nfolds=3,
        **training_config
    )

else:
    results_nesterov = optimizers[NesterovOptimizer]

## Minibatch

In [21]:
dec_lr_set =  [0]*1 + [1]*1
random.shuffle(dec_lr_set)
search_grid_mini  = {
        'lr': np.linspace(0.00001, 0.01, 5),
        'batch_size': [32, 64, 128],
        'decreasing_lr': dec_lr_set,
    }
if hyperparameter_tune:
    results_mini = tune_optimizer(
        model=Net().to(device),
        optim_fun=MiniBatchOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_mini,
        nfolds=3,
        **training_config
    )

else:
    results_mini = optimizers[MiniBatchOptimizer]

In [26]:
df_analysis = pd.DataFrame(results_mini)
best_acc = 0.0
for index, row in df_analysis.iterrows():    
        trial_acc = row["metric_test"]
        if trial_acc > best_acc:
            best_acc = trial_acc
            learning_rate = round(row["lr"], 6)
            decreasing_lr = row["decreasing_lr"]

print("Best Accuracy was {}% with Learning Rate {} and Decreasing LR: {}".format(100*best_acc, learning_rate, decreasing_lr))


Best Accuracy was 98.94448138297872% with Learning Rate 0.125893 and Decreasing LR: False


## Comparison

### TODO

# Attack on naive model



In [27]:
from data_utils import build_data_loaders
from training import training, testing

## Train naive models

### Adam

In [28]:
net_naive_adam = Net().to(device)
train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, results_adam['batch_size'])

In [29]:
adam_opt_naive = AdamOptimizer(net_naive_adam.parameters(), lr=results_adam['lr'], beta1=results_adam['beta1'],beta2=results_adam['beta2'],weight_decay=results_adam['weight_decay'],epsilon=results_adam['epsilon'])
loss_train, acc_train = training(net_naive_adam, train_loader, adam_opt_naive, training_config['loss_fun'], training_config['metric_fun'], epochs=training_config['epochs'], device=device)
loss_test, acc_test = testing(net_naive_adam, test_loader, training_config['loss_fun'], training_config['metric_fun'], device=device)

Launching training on cuda
batch 100	loss = 2.267	acc = 0.2031
batch 200	loss = 2.093	acc = 0.5781
batch 300	loss = 1.363	acc = 0.6406
batch 400	loss = 0.5689	acc = 0.8594
batch 500	loss = 0.3135	acc = 0.9062
batch 600	loss = 0.4358	acc = 0.8594
batch 700	loss = 0.2853	acc = 0.9219
batch 800	loss = 0.3075	acc = 0.875
batch 900	loss = 0.139	acc = 0.9531
epoch 0	avg epoch loss = 0.9228	avg epoch acc = 0.7384
batch 100	loss = 0.08239	acc = 0.9688
batch 200	loss = 0.1757	acc = 0.9688
batch 300	loss = 0.1137	acc = 0.9688
batch 400	loss = 0.2313	acc = 0.9688
batch 500	loss = 0.1433	acc = 0.9531
batch 600	loss = 0.2047	acc = 0.9219
batch 700	loss = 0.1001	acc = 0.9844
batch 800	loss = 0.08361	acc = 0.9688
batch 900	loss = 0.0476	acc = 0.9844
epoch 1	avg epoch loss = 0.1227	avg epoch acc = 0.964
batch 100	loss = 0.05947	acc = 0.9688
batch 200	loss = 0.08712	acc = 0.9688
batch 300	loss = 0.05056	acc = 0.9844
batch 400	loss = 0.1957	acc = 0.9688
batch 500	loss = 0.088	acc = 0.9688
batch 600	loss

### Nesterov



In [30]:
naive_networks = dict()
data_naive = list()
batch_log_interval = 0

for optimizer, optimizer_params in optimizers.items():
    print(f'--- {optimizer}')
    optimizer_params = optimizer_params.copy()
    
    net = Net().to(device)
    # Instantiate data loaders with selected batch size
    batch_size = optimizer_params.pop('batch_size')
    train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, batch_size)
    # Instantiate optimizer
    optimizer_instance = optimizer(net.parameters(), **optimizer_params)
    # Train
    loss_train, acc_train = training(
        model=net, 
        dataset=train_loader, 
        optim=optimizer_instance,
        batch_log_interval=batch_log_interval,
        **training_config
    )
    # Test
    loss_test, acc_test = testing(
        model=net,
        dataset=test_loader,
        **test_config
    )
    # Log
    data_naive.append({
        'optimizer': str(optimizer),
        'loss_train': loss_train,
        'acc_train': acc_train,
        'loss_test': loss_test,
        'acc_test': acc_test
    })
    # Save naive model
    naive_networks[optimizer] = net

--- <class 'optimizer.AdamOptimizer'>


TypeError: ignored

### Minibatch (for now, loop later)

In [31]:
net_naive_mini = Net().to(device)
train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, results_mini['batch_size'])

In [32]:
mini_opt_naive = MiniBatchOptimizer(net_naive_mini.parameters(), lr=results_mini['lr'], decreasing_lr=results_mini['decreasing_lr'])
loss_train, acc_train = training(net_naive_mini, train_loader, mini_opt_naive, training_config['loss_fun'], training_config['metric_fun'], epochs=training_config['epochs'], device=device)
loss_test, acc_test = testing(net_naive_mini, test_loader,training_config['loss_fun'], training_config['metric_fun'], device=device)

Launching training on cuda
batch 100	loss = 0.2113	acc = 0.9453
batch 200	loss = 0.1763	acc = 0.9609
batch 300	loss = 0.08091	acc = 0.9766
batch 400	loss = 0.01724	acc = 1.0
epoch 0	avg epoch loss = 0.2232	avg epoch acc = 0.9323
batch 100	loss = 0.04453	acc = 0.9844
batch 200	loss = 0.118	acc = 0.9844
batch 300	loss = 0.05569	acc = 0.9844
batch 400	loss = 0.003604	acc = 1.0
epoch 1	avg epoch loss = 0.05207	avg epoch acc = 0.9838
batch 100	loss = 0.01543	acc = 1.0
batch 200	loss = 0.09866	acc = 0.9844
batch 300	loss = 0.04226	acc = 0.9844
batch 400	loss = 0.001441	acc = 1.0
epoch 2	avg epoch loss = 0.03439	avg epoch acc = 0.9899
batch 100	loss = 0.008184	acc = 1.0
batch 200	loss = 0.08595	acc = 0.9844
batch 300	loss = 0.03442	acc = 0.9844
batch 400	loss = 0.0008632	acc = 1.0
epoch 3	avg epoch loss = 0.0252	avg epoch acc = 0.9929
batch 100	loss = 0.005902	acc = 1.0
batch 200	loss = 0.07282	acc = 0.9844
batch 300	loss = 0.03281	acc = 0.9844
batch 400	loss = 0.0005941	acc = 1.0
epoch 4	avg

## Attack naive models

In [33]:
from adversary import attack

In [34]:
epsilons = np.arange(0, 0.5, 0.05)

In [35]:
# use the lst_optimizer
# Only one optimizer used in this part?

### Adam

In [36]:
accuracy_naive_adam= []
losses_naive_adam= []

for eps in epsilons:
    loss_attack, acc_attack  = attack(net_naive_adam, training_config['loss_fun'],training_config['metric_fun'], test_loader, epsilon=eps, device=device)
    accuracy_naive_adam.append(acc_attack)
    losses_naive_adam.append(loss_attack)

Epsilon: 0.00	Test Accuracy = 0.970
Epsilon: 0.05	Test Accuracy = 0.965
Epsilon: 0.10	Test Accuracy = 0.956
Epsilon: 0.15	Test Accuracy = 0.947
Epsilon: 0.20	Test Accuracy = 0.932
Epsilon: 0.25	Test Accuracy = 0.916
Epsilon: 0.30	Test Accuracy = 0.891
Epsilon: 0.35	Test Accuracy = 0.855
Epsilon: 0.40	Test Accuracy = 0.809
Epsilon: 0.45	Test Accuracy = 0.745


### Nesterov

In [37]:
data_naive = list()

for optimizer, network in naive_networks.items():
    print(f'--- {optimizer}')
    
    for eps in epsilons:
        loss_attack, acc_attack = attack(
            model=network, 
            loss_fun=training_config['loss_fun'],
            test_loader=test_loader, 
            epsilon=eps, 
            device=training_config['loss_fun']
        )
        # Log
        data_naive.append({
            'optimizer': str(optimizer),
            'epsilon': eps,
            'loss': loss_attack,
            'acc': acc_attack
        })

### Minibatch (for now, loop later)

In [38]:
accuracy_naive= []
losses_naive= []

for eps in epsilons:
    loss_attack, acc_attack  = attack(net_naive_mini,  training_config['loss_fun'],training_config['metric_fun'], test_loader, epsilon=eps, device=device)
    accuracy_naive.append(acc_attack)
    losses_naive.append(loss_attack)

Epsilon: 0.00	Test Accuracy = 0.969
Epsilon: 0.05	Test Accuracy = 0.963
Epsilon: 0.10	Test Accuracy = 0.954
Epsilon: 0.15	Test Accuracy = 0.944
Epsilon: 0.20	Test Accuracy = 0.933
Epsilon: 0.25	Test Accuracy = 0.915
Epsilon: 0.30	Test Accuracy = 0.888
Epsilon: 0.35	Test Accuracy = 0.854
Epsilon: 0.40	Test Accuracy = 0.805
Epsilon: 0.45	Test Accuracy = 0.743


## Comparison

# Attack on robust model

## Hyperparameter optimization on robust models

- Set hyperparameter tuning for robust models:
-- True, in case you want to get tuned hyperparameters.
-- False, by default. If you want to tryout, we have already processed it and got the same results out.


In [2]:
prot_optimizers = {
    AdamOptimizer: get_best_hyperparams('./res/prot_adam_tuning.json'),
    NesterovOptimizer: get_best_hyperparams('./res/prot_nesterov_tuning.json'),
    MiniBatchOptimizer: get_best_hyperparams('./res/prot_minibatch_tuning.json')
}

### Adam

In [40]:
search_grid_adam = {
        'lr': np.linspace(0.001, 0.01, 2),
        'beta1':  np.linspace(0.1, 0.9, 2),
        'beta2': np.linspace(0.5, 0.999, 2),
        'batch_size': [32, 64, 128],
        'weight_decay': np.linspace(0.001, 0.1, 2),
        'epsilon': np.linspace(1e-10, 1e-8, 2),
    }

if prot_hyperparameter_tune:
    results_adam_prot = tune_optimizer(
        model=Net().to(device),
        optim_fun=AdamOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_adam,
        nfolds=3,
        func=protected_training,
        **training_config)

else:
    results_adam_prot = optimizers[AdamOptimizer]

Launching hyperparameter tuning:
	lr = [0.001 0.01 ]
	beta1 = [0.1 0.9]
	beta2 = [0.5   0.999]
	batch_size = [32, 64, 128]
	weight_decay = [0.001 0.1  ]
	epsilon = [1.e-10 1.e-08]
{'lr': 0.001, 'beta1': 0.1, 'beta2': 0.5, 'batch_size': 32, 'weight_decay': 0.001, 'epsilon': 1e-10}
Epoch 0	avg epoch Loss = 0.2438	avg epoch acc = 0.9275
Epoch 1	avg epoch Loss = 0.09376	avg epoch acc = 0.6452
Epoch 2	avg epoch Loss = 0.07647	avg epoch acc = 0.1528
Epoch 3	avg epoch Loss = 0.07911	avg epoch acc = 0.1184
Epoch 4	avg epoch Loss = 0.07892	avg epoch acc = 0.1192
Epoch 5	avg epoch Loss = 0.08224	avg epoch acc = 0.1398
Epoch 6	avg epoch Loss = 0.08445	avg epoch acc = 0.2138
Epoch 7	avg epoch Loss = 0.0906	avg epoch acc = 0.2257
Epoch 8	avg epoch Loss = 0.08997	avg epoch acc = 0.1785
Epoch 9	avg epoch Loss = 0.09514	avg epoch acc = 0.2173
training took 54.68 s
Avg test loss = 1.68e+03	Avg test acc = 0.204
Epoch 0	avg epoch Loss = 0.2485	avg epoch acc = 0.9345
Epoch 1	avg epoch Loss = 0.09958	avg e

### Nesterov

In [41]:
search_grid_nesterov = {
    'lr': np.logspace(0, 1),
    'batch_size': [32, 64, 128]
}

if hyperparameter_tune:
    results_nesterov_prot = tune_optimizer(
        model=Net().to(device),
        optim_fun=NesterovOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_nesterov,
        nfolds=3,
        func=protected_training,
        **training_config
    )

else:
    results_nesterov_prot = optimizers[NesterovOptimizer]

### Minibatch

In [42]:
dec_lr_set =  [0]*1 + [1]*1
random.shuffle(dec_lr_set)
search_grid_mini  = {
        'lr': np.linspace(0.00001, 0.01, 5),
        'batch_size': [32, 64, 128],
        'decreasing_lr': dec_lr_set,
    }
if hyperparameter_tune:
    results_mini_prot = tune_optimizer(
        model=Net().to(device),
        optim_fun=MiniBatchOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_mini,
        nfolds=3,
        func=protected_training,
        **training_config
    )

else:
    results_mini_prot = optimizers[MiniBatchOptimizer]

## Train robust models

In [None]:
from adversary import protect

### Minibatch (for now, loop later)

In [None]:
robust_net = Net().to(device)
protect_epochs = training_config['epochs']
protect_lr = results_mini_prot['lr']
protect_bz = results_mini_prot['batch_size']
protect_dec_lr = results_mini_prot['decreasing_lr']
prot_train_loader, prot_test_loader = build_data_loaders(train_dataset, test_dataset, protect_bz)
mini_opt_proc = MiniBatchOptimizer(robust_net.parameters(), lr=protect_lr, decreasing_lr=protect_dec_lr)
robust_net = protect(robust_net, mini_opt_proc, training_config['loss_fun'], prot_train_loader, prot_test_loader, device=device, epochs=protect_epochs)

NameError: ignored

### Adam

In [None]:
robust_net = Net().to(device)
protect_epochs = training_config['epochs']
protect_lr_adam = results_adam_prot['lr']
protect_bz_adam = results_adam_prot['batch_size']
protect_beta1 = results_adam_prot['beta1']
protect_beta2 = results_adam_prot['beta2']
protect_weight_decay = results_adam_prot['weight_decay']
protect_epsilon = results_adam_prot['epsilon']
prot_train_loader, prot_test_loader = build_data_loaders(train_dataset, test_dataset, protect_bz)
adam_opt_proc = AdamOptimizer(net_naive.parameters(), lr=protect_lr_adam, beta1=protect_beta1,beta2=protect_beta2,weight_decay=protect_weight_decay,epsilon=protect_epsilon)
robust_net_adam = protect(robust_net, adam_opt_proc, training_config['loss_fun'], prot_train_loader, prot_test_loader, device=device, epochs=training_config['epochs'])

NameError: ignored

### Nesterov



In [None]:
robust_networks = dict()
batch_log_interval = 0
epsilon = 0.25

for optimizer, optimizer_params in prot_optimizers.items():
    # Instantiate model
    net = Net().to(device)
    # Instantiate optimizer
    optimizer_params = optimizer_params.copy()
    batch_size = optimizer_params.pop('batch_size')
    optimizer_instance = optimizer(net.parameters(), **optimizer_params)
    # Instantiate data loaders
    train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, batch_size)
    # Train robust model
    protect(
        model=net,
        optim=optimizer_instance,
        train_loader=train_loader,
        test_loader=test_loader,
        epsilon=epsilon,
        **training_config
    )
    # Save robust net
    robust_networks[optimizer] = net

## Attack robust models

In [None]:
accuracy_fgsm = dict()
losses_fgsm = dict()

accuracy_pgd = dict()
losses_pgd = dict()

for optimizer, optimizer_params in prot_optimizers.items():
    # Instantiate model
    net = robust_networks[optimizer]
    # Instantiate optimizer
    optimizer_params = optimizer_params.copy()
    batch_size = optimizer_params.pop('batch_size')



### Minibatch (for now, loop later)

In [None]:
accuracy_robust = []
losses_robust = []
epsilons = np.arange(0, 0.5, 0.05)

# This should be the first term test_loader is used
for eps in epsilons:
    loss_attack, acc_attack = attack(robust_net, criterion, prot_test_loader, eps, device=device)
    accuracy_robust.append(acc_attack)
    losses_robust.append(loss_attack)

### Adam

In [None]:
accuracy_robust_adam = []
losses_robust_adam = []
# This should be the first term test_loader is used
for eps in epsilons:
    loss_attack, acc_attack = attack(robust_net_adam, criterion, prot_test_loader, eps, device=device)
    accuracy_robust_adam.append(acc_attack)
    losses_robust_adam.append(loss_attack)

## Comparison

# Comparative analysis

### Minibatch (for now)

In [None]:
plt.figure(figsize=(5,5))
plt.plot(epsilons, accuracy_naive, "*-", c='blue', label='Naive Model')
plt.plot(epsilons, accuracy_robust, "*-", c='orange', label='Robust Model')

plt.yticks(np.arange(0, 1.1, step=0.1))
plt.xticks(np.arange(0, 0.5, step=0.05))

plt.title("Accuracy vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy")
plt.legend();

Lots of plots

* diff naive vs robust (algo as hue)