# guidelines

TODO : import whenever needed, not centralized

states https://pytorch.org/tutorials/beginner/saving_loading_models.html

# Introduction 

## Aim

## Data

In [1]:
# download

# Import (Remove section later on)

In [2]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
from adversary import attack, protect
from net import Net
from torch.optim import Optimizer
from training import training, testing, accuracy, tune_optimizer
from minibatch import MiniBatchOptimizer
from adam import AdamOptimizer
from data_utils import get_mnist, build_data_loaders
import json
from pathlib import Path
import random



## Setup

Below one can find flags that will setup the notebook:

In [4]:
# Whether to tune the hyperparameters in this notebook
# Note that this might take a long time (especially for Adam)
hyperparameter_tune = False

In [5]:
# Whether to use the GPU, if it's not available, this will be ignored
use_cuda = True

device = torch.device('cuda' if use_cuda and torch.cuda.is_available() else 'cpu')
print("Device chosen is {}".format(device))

Device chosen is cuda


We now load the dataset:

In [6]:
from data_utils import get_mnist

train_dataset, test_dataset = get_mnist(normalize=True)

We setup the training parameters that we will use all along the notebook, in order to improve readability in downstream code:

In [19]:
from training import accuracy

training_config = {
    # Loss function
    'loss_fun': torch.nn.CrossEntropyLoss(),
    # Performance evaluation function
    'metric_fun': accuracy,
    # The device to train on
    'device': device,
    # Number of epochs
    'epochs': 10
}

test_config = training_config.copy()
test_config.pop('epochs');

Note that we will use a model with a 10-dimensional output, where each output is passed through softmax. When receiving an output 

$$Z = \begin{bmatrix} \mathbf z_1 & \dots & \mathbf z_B \end{bmatrix}^\top \in \mathbb R^{B \times 10}$$

with $B$ the batch size, we first retrieve the maximal component of each $\mathbf z_i$:

$$\hat y_i = \text{argmax}_{k = 1, \ldots, 10} \; z_{ik}, \quad i = 1, \ldots, B$$

and then compute the accuracy:

$$\text{acc} = \frac 1 B \sum_{i=1}^B I\left\{ \hat y_i = y_i \right\} $$

with $I$ the indicator function and $y_i \in \{1, \ldots, 10\}$ the true target. 

In [8]:
# View the source code
??accuracy

[0;31mSignature:[0m [0maccuracy[0m[0;34m([0m[0myhat[0m[0;34m,[0m [0my[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mSource:[0m   
[0;32mdef[0m [0maccuracy[0m[0;34m([0m[0myhat[0m[0;34m,[0m [0my[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0mprediction[0m [0;34m=[0m [0myhat[0m[0;34m.[0m[0margmax[0m[0;34m([0m[0mdim[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mreturn[0m [0;34m([0m[0my[0m[0;34m.[0m[0meq[0m[0;34m([0m[0mprediction[0m[0;34m)[0m[0;34m)[0m[0;34m.[0m[0mto[0m[0;34m([0m[0mfloat[0m[0;34m)[0m[0;34m.[0m[0mmean[0m[0;34m([0m[0;34m)[0m[0;34m.[0m[0mitem[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      /media/maousi/Data/Documents/Programmation/courses/DS-MA2/optml_project/training.py
[0;31mType:[0m      function


# Model

We use a simple standard model for the MNIST dataset (can be found [here](https://github.com/floydhub/mnist/blob/master/ConvNet.py)).

In [9]:
from net import Net

In [10]:
??Net

[0;31mInit signature:[0m [0mNet[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mNet[0m[0;34m([0m[0mnn[0m[0;34m.[0m[0mModule[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"""ConvNet -> Max_Pool -> RELU -> ConvNet -> Max_Pool -> RELU -> FC -> RELU -> FC -> SOFTMAX"""[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__init__[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0msuper[0m[0;34m([0m[0mNet[0m[0;34m,[0m [0mself[0m[0;34m)[0m[0;34m.[0m[0m__init__[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0mself[0m[0;34m.[0m[0mconv1[0m [0;34m=[0m [0mnn[0m[0;34m.[0m[0mConv2d[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0;36m20[0m[0;34m,[0m [0;36m5[0m[0;34m,[0m [0;36m1[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0mself[0m[0;34m.[0m[0mconv2[0m [0;34m=[0m [0mnn[0m[0;34m.[0m[0mConv2d[0m[0;34m([0m[0;36m20[0m[0;34m,[0m [0

# Hyperparameter tuning

In [11]:
from training import tune_optimizer
from optimizer import AdamOptimizer, NesterovOptimizer, MiniBatchOptimizer

If the `hyperparameter_tune` flag was set to `True` above, the following code will run hyperparameter tuning on all optimizers. Note that one can either run KFold cross validation (by providing `n_folds`) or use a simple train/test split (by providing `train_ratio`).

If the flag is set to `False`, the cell below will simply set up the hyperparameters that we carefully cross-validated:

In [16]:
optimizers = {
    AdamOptimizer: None,
    NesterovOptimizer: None,
    MiniBatchOptimizer: None
}

## Adam

In [None]:


net_tune = Net().to(device)
adam_tune = AdamOptimizer(net_tune.parameters()) 
fp = './res/adam_tuning.json'


In [None]:
if not hyperparameter_tune:
    results = []
else:
    results = tune_optimizer(
    net_tune,
    train_dataset.data,
    train_dataset.targets,
    criterion,
    accuracy,
    device,
    AdamOptimizer,
    epochs=10,
    search_grid={
        'lr': np.linspace(0.00001, 0.01, 5),
        'beta1':  np.linspace(0.1, 0.9, 5),
        'beta2': np.linspace(0.5, 0.999, 5),
        'weight_decay': np.linspace(0.0001, 0.1, 4),
        'epsilon': np.linspace(1e-10, 1e-8, 3),
    }
    batch_size=16
)

In [None]:
if Path(fp).exists():
    with open(fp, 'r') as f:
        old_results = json.load(f)

    results = old_results + results

with open(fp, 'w') as f:
    json.dump(results, f, indent=2)

# Select Best Hyperparamters
with open(fp, 'r') as f:
        old_results = json.load(f)

In [None]:
def find_best_params_adam(adam_tuning):
    best_params = dict()
    best_params['loss_test'] = float('inf')
    best_params['acc_test'] = -float('inf')

        
    #IF STD, divide metrics by std
    #Wait for the new tuning
    for item in adam_tuning:
        if best_params['acc_test'] < item['metric_test'] or best_params['loss_test'] > item['loss_test']:
            best_params['loss_train'] = item['loss_train']
            best_params['acc_train'] = item['metric_train']
            best_params['acc_test'] = item['metric_test']
            best_params['loss_test'] = item['loss_test']
            best_params['lr'] = item['lr']
            best_params['beta1'] = item['beta1']
            best_params['beta2'] = item['beta2']
            best_params['weight_decay'] = item['weight_decay']
            best_params['epsilon'] = item['epsilon']
            
    return best_params
best_adam_params = find_best_params_adam(adam_tuning)
print_adam_stats(best_adam_params)

## Nesterov

In [21]:
from optimizer import NesterovOptimizer

In [None]:
search_grid_nesterov = {
    'lr': np.logspace(0, 1),
    'batch_size': [32, 64, 128]
}

if hyperparameter_tune:
    results_nesterov = tune_optimizer(
        model=Net().to(device),
        optim_fun=NesterovOptimizer,
        xtrain=train_dataset.data,
        ytrain=train_dataset.targets,
        search_grid=search_grid_nesterov,
        nfolds=5,
        **training_config
    )

# Generic function to select best config
# so something like
# optimizers[NesterovOptimizer] = select_best_params(results_nesterov)

## Minibatch

In [None]:
net_tune = Net().to(device)
mini_opt_tune = MiniBatchOptimizer(net_tune.parameters()) # Just using defaults
dec_lr_set =  [0]*1 + [1]*1
random.shuffle(dec_lr_set)
fp = './res/mini_tuning.json'
if not hyperparamter_tune:
    results = []

In [None]:
if hyperparamter_tune:
    results = tune_optimizer(
    net_tune,
    train_dataset.data,
    train_dataset.targets,
    criterion,
    accuracy,
    device,
    MiniBatchOptimizer,
    epochs=10,
    search_grid={
        'lr': np.linspace(0.00001, 0.01, 5),
        'decreasing_lr': dec_lr_set,
    }, 
    batch_size=16
)

In [None]:
if Path(fp).exists():
    with open(fp, 'r') as f:
        old_results = json.load(f)

    results = old_results + results

with open(fp, 'w') as f:
    json.dump(results, f, indent=2)

# Select Best Hyperparamters
with open(fp, 'r') as f:
        old_results = json.load(f)

In [None]:
df_analysis = pd.DataFrame(results)
best_acc = 0.0
for index, row in df_analysis.iterrows():    
        trial_acc = row["metric_test"]
        if trial_acc > best_acc:
            best_acc = trial_acc
            learning_rate = round(row["lr"], 6)
            decreasing_lr = row["decreasing_lr"]

print("Best Accuracy was {}% with Learning Rate {} and Decreasing LR: {}".format(100*best_acc, learning_rate, decreasing_lr))


In [None]:
# lst_optimizer = {'name': function_optimizer}

## Comparison

# Attack on naive model



In [14]:
from data_utils import build_data_loaders
from training import training, testing

## Train naive models

### Minibatch (for now, loop later)

In [None]:
net_naive = Net().to(device)
train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, batch_size)

In [None]:
mini_opt_naive = MiniBatchOptimizer(net_naive.parameters(), lr=learning_rate, decreasing_lr=decreasing_lr)
loss_train, acc_train = training(net_naive, train_loader, mini_opt_naive, criterion, accuracy, epochs=epochs, device=device)
loss_test, acc_test = testing(net_naive, test_loader, criterion, accuracy, device=device)

## Adam

In [None]:
net_naive_adam = Net().to(device)
train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, batch_size)

In [None]:
adam_opt_naive = AdamOptimizer(net_naive.parameters(), lr=best_adam_params['lr'], beta1=best_adam_params['beta1'],beta2=best_adam_params['beta2'],weight_decay=best_adam_params['weight_decay'],epsilon=best_adam_params['epsilon'])
loss_train, acc_train = training(net_naive, train_loader, adam_opt_naive, criterion, accuracy, epochs=epochs, device=device)
loss_test, acc_test = testing(net_naive, test_loader, criterion, accuracy, device=device)

## Nesterov



In [None]:
naive_networks = dict()
data_naive = list()
batch_log_interval = 0

for optimizer, optimizer_params in optimizers.items():
    print(f'--- {optimizer}')
    optimizer_params = optimizer_params.copy()
    
    net = Net().to(device)
    # Instantiate data loaders with selected batch size
    batch_size = optimizer_params.pop('batch_size')
    train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, batch_size)
    # Instantiate optimizer
    optimizer_instance = optimizer(net.parameters(), **optimizer_params)
    # Train
    loss_train, acc_train = training(
        model=net, 
        dataset=train_loader, 
        optim=optimizer_instance,
        batch_log_interval=batch_log_interval,
        **training_config
    )
    # Test
    loss_test, acc_test = testing(
        model=net,
        dataset=test_loader,
        **test_config
    )
    # Log
    data_naive.append({
        'optimizer': str(optimizer),
        'loss_train': loss_train,
        'acc_train': acc_train,
        'loss_test': loss_test,
        'acc_test': acc_test
    })
    # Save naive model
    naive_networks[optimizer] = net

## Attack naive models

In [21]:
from adversary import attack

In [22]:
epsilons = np.arange(0, 0.5, 0.05)

In [None]:
# use the lst_optimizer
# Only one optimizer used in this part?

### Minibatch (for now, loop later)

In [None]:
accuracy_naive= []
losses_naive= []

for eps in epsilons:
    loss_attack, acc_attack  = attack(net_naive, criterion, test_loader, epsilon=eps, device=device)
    accuracy_naive.append(acc_attack)
    losses_naive.append(loss_attack)

### Adam

In [None]:
accuracy_naive_adam= []
losses_naive_adam= []

for eps in epsilons:
    loss_attack, acc_attack  = attack(net_naive_adam, criterion, test_loader, epsilon=eps, device=device)
    accuracy_naive_adam.append(acc_attack)
    losses_naive_adam.append(loss_attack)

### Nesterov

In [24]:
data_naive = list()

for optimizer, network in naive_networks.items():
    print(f'--- {optimizer}')
    
    for eps in epsilons:
        loss_attack, acc_attack = attack(
            model=network, 
            loss_fun=training_config['loss_fun'],
            test_loader=test_loader, 
            epsilon=eps, 
            device=training_config['loss_fun']
        )
        # Log
        data_naive.append({
            'optimizer': str(optimizer),
            'epsilon': eps,
            'loss': loss_attack,
            'acc': acc_attack
        })

NameError: name 'naive_networks' is not defined

## Comparison

# Attack on robust model

## Train robust models

In [25]:
from adversary import protect

### Minibatch (for now, loop later)

In [None]:
robust_net = Net().to(device)
protect_epochs = epochs
protect_lr = learning_rate
protect_bz = batch_size
protect_dec_lr = decreasing_lr
prot_train_loader, prot_test_loader = build_data_loaders(train_dataset, test_dataset, protect_bz)
mini_opt_proc = MiniBatchOptimizer(robust_net.parameters(), lr=protect_lr, decreasing_lr=protect_dec_lr)

In [None]:
robust_net = protect(robust_net, mini_opt_proc, criterion, prot_train_loader, prot_test_loader, device=device, epochs=protect_epochs)

## Adam

In [None]:
robust_net = Net().to(device)
protect_epochs = epochs
protect_lr = learning_rate
protect_bz = batch_size

prot_train_loader, prot_test_loader = build_data_loaders(train_dataset, test_dataset, protect_bz)
adam_opt_proc = AdamOptimizer(net_naive.parameters(), lr=best_adam_params['lr'], beta1=best_adam_params['beta1'],beta2=best_adam_params['beta2'],weight_decay=best_adam_params['weight_decay'],epsilon=best_adam_params['epsilon'])

In [None]:
robust_net_adam = protect(robust_net, adam_opt_proc, criterion, prot_train_loader, prot_test_loader, device=device, epochs=protect_epochs)

## Nesterov



In [None]:
robust_networks = dict()
batch_log_interval = 0
epsilon = 0.25

for optimizer, optimizer_params in optimizers.items():
    # Instantiate model
    net = Net().to(device)
    # Instantiate optimizer
    optimizer_params = optimizer_params.copy()
    batch_size = optimizer_params.pop('batch_size')
    optimizer_instance = optimizer(net.parameters(), **optimizer_params)
    # Instantiate data loaders
    train_loader, test_loader = build_data_loaders(train_dataset, test_dataset, batch_size)
    # Train robust model
    protect(
        model=net,
        optim=optimizer_instance,
        train_loader=train_loader,
        test_loader=test_loader,
        epsilon=epsilon,
        **training_config
    )
    # Save robust net
    robust_networks[optimizer] = net

## Attack robust models

### Minibatch (for now, loop later)

In [None]:
accuracy_robust = []
losses_robust = []
# This should be the first term test_loader is used
for eps in epsilons:
    loss_attack, acc_attack = attack(robust_net, criterion, prot_test_loader, eps, device=device)
    accuracy_robust.append(acc_attack)
    losses_robust.append(loss_attack)

### Adam

In [None]:
accuracy_robust_adam = []
losses_robust_adam = []
# This should be the first term test_loader is used
for eps in epsilons:
    loss_attack, acc_attack = attack(robust_net_adam, criterion, prot_test_loader, eps, device=device)
    accuracy_robust_adam.append(acc_attack)
    losses_robust_adam.append(loss_attack)

## Comparison

# Comparative analysis

### Minibatch (for now)

In [None]:
plt.figure(figsize=(5,5))
plt.plot(epsilons, accuracy_naive, "*-", c='blue', label='Naive Model')
plt.plot(epsilons, accuracy_robust, "*-", c='orange', label='Robust Model')

plt.yticks(np.arange(0, 1.1, step=0.1))
plt.xticks(np.arange(0, 0.5, step=0.05))

plt.title("Accuracy vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy")
plt.legend();

Lots of plots

* diff naive vs robust (algo as hue)