# Applying an Adversarial Attack to a Equlibrium Propagation based NN

Reusing Equlibrium Propagation code from https://github.com/smonsays/equilibrium-propagation.git

Training the basic MNIST model using EP and then finding the image associated with the nudged fixed point where the labels are incorrect.

### Initial Setup of the EP code

In [2]:
import torch
import torchvision

In [3]:
!git clone https://github.com/smonsays/equilibrium-propagation.git

Cloning into 'equilibrium-propagation'...
remote: Enumerating objects: 36, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 36 (delta 12), reused 32 (delta 10), pack-reused 0[K
Unpacking objects: 100% (36/36), 53.06 KiB | 2.21 MiB/s, done.


In [4]:
cd equilibrium-propagation

/content/equilibrium-propagation


In [5]:
import argparse
import json
import logging
import sys

import torch

from lib import config, data, energy, train, utils


def load_default_config(energy):
    """
    Load default parameter configuration from file.
    Args:
        tasks: String with the energy name
    Returns:
        Dictionary of default parameters for the given energy
    """
    if energy == "restr_hopfield":
        default_config = "etc/energy_restr_hopfield.json"
    elif energy == "cond_gaussian":
        default_config = "etc/energy_cond_gaussian.json"
    else:
        raise ValueError("Energy based model \"{}\" not defined.".format(energy))

    with open(default_config) as config_json_file:
        cfg = json.load(config_json_file)

    return cfg

cfg = load_default_config('cond_gaussian')

In [6]:
cfg

{'batch_size': 100,
 'beta': 1,
 'c_energy': 'cross_entropy',
 'dataset': 'mnist',
 'dimensions': [784, 1000, 10],
 'dynamics': {'dt': 0.1, 'n_relax': 20, 'tau': 1, 'tol': 0},
 'energy': 'cond_gaussian',
 'epochs': 50,
 'fast_ff_init': True,
 'learning_rate': 0.001,
 'nonlinearity': 'sigmoid',
 'optimizer': 'adam',
 'seed': None}

### Training the EP MNIST classifier

In [7]:
# Initialize seed if specified (might slow down the model)
if cfg['seed'] is not None:
    torch.manual_seed(cfg['seed'])

# Create the cost function to be optimized by the model
c_energy = utils.create_cost(cfg['c_energy'], cfg['beta'])

# Create activation functions for every layer as a list
phi = utils.create_activations(cfg['nonlinearity'], len(cfg['dimensions']))

# Initialize energy based model
if cfg["energy"] == "restr_hopfield":
    model = energy.RestrictedHopfield(
        cfg['dimensions'], c_energy, cfg['batch_size'], phi).to(config.device)
elif cfg["energy"] == "cond_gaussian":
    model = energy.ConditionalGaussian(
        cfg['dimensions'], c_energy, cfg['batch_size'], phi).to(config.device)
else:
    raise ValueError(f'Energy based model \"{cfg["energy"]}\" not defined.')

# Define optimizer (may include l2 regularization via weight_decay)
w_optimizer = utils.create_optimizer(model, cfg['optimizer'],  lr=cfg['learning_rate'])

# Create torch data loaders with the MNIST data set
mnist_train, mnist_test = data.create_mnist_loaders(cfg['batch_size'])

print("Start training with parametrization:\n{}".format(
    json.dumps(cfg, indent=4, sort_keys=True)))

for epoch in range(8):
    # Training
    train.train(model, mnist_train, cfg['dynamics'], w_optimizer, cfg["fast_ff_init"])

    # Testing
    test_acc, test_energy = train.test(model, mnist_test, cfg['dynamics'], cfg["fast_ff_init"])

    # Logging
    print(
        "epoch: {} \t test_acc: {:.4f} \t mean_E: {:.4f}".format(
            epoch, test_acc, test_energy)
    )

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw

Start training with parametrization:
{
    "batch_size": 100,
    "beta": 1,
    "c_energy": "cross_entropy",
    "dataset": "mnist",
    "dimensions": [
        784,
        1000,
        10
    ],
    "dynamics": {
        "dt": 0.1,
        "n_relax": 20,
        "tau": 1,
        "tol": 0
    },
    "energy": "cond_gaussian",
    "epochs": 50,
    "fast_ff_init": true,
    "learning_rate": 0.001,
    "nonlinearity": "sigmoid",
    "optimizer": "adam",
    "seed": null
}
epoch: 0 	 test_acc: 0.9491 	 mean_E: 0.0000
epoch: 1 	 test_acc: 0.9671 	 mean_E: 0.0000


KeyboardInterrupt: ignored

### Check the outputs visually
1. get some testing data
2. display the testing data
3. get the predictions for this testing data

In [None]:
for _, (test_x_batch, test_y_batch) in enumerate(mnist_test):
    break

In [None]:
test_x_batch.shape

In [None]:
out = train.predict_batch(model, test_x_batch.to(config.device), cfg['dynamics'], False)

In [None]:
out.shape

In [None]:
from PIL import Image
import numpy as np
Image.fromarray(
    np.concatenate(
        np.array(
            torchvision.transforms.Normalize(
                (0.5,), (0.5,) # Data was normalized to some strange mean and std by preprocessing, we must undo this to visialize.
            )(test_x_batch)[:20]*255
        ),
        axis=2
    ).transpose([1, 2, 0])[:,:,0].astype(np.uint8)
)

In [None]:
out[:20].argmax(axis=1)

### Plot some PR curves to see our performance
1. a random guesser PR curve to check that everything is working as it should
2. the PR curve generated by the predicitons

In [None]:
from sklearn.metrics import PrecisionRecallDisplay
import matplotlib.pyplot as plt

In [None]:
def PRCurveOnDigit(test_y_true, test_y_preds, digit):
    
    return PrecisionRecallDisplay.from_predictions(
        np.where(np.argmax(np.array(test_y_true), axis=1)==digit, 1, 0),
        np.where(np.argmax(np.array(test_y_preds), axis=1)==digit, 1, 0),
    )

In [None]:
# The guesses by the random guesser have no area beneath the curve, as expected.
PRCurveOnDigit(
    test_y_batch,
    np.random.normal(loc=0.5, scale=0.5, size=out.to('cpu').shape),
    6
)
plt.title('Random Guesser PR Curve')

In [None]:
# The guesses by the trained model appear to have 0 mistakes resulting in a perfect PR curve
PRCurveOnDigit(
    test_y_batch, 
    out.to('cpu'),
    0
)
plt.title('''PR on Clean Images
(Should have high AUC)''')
None

### Now is the fun part... the attack
1. load in the image and allow the model to relax
2. load in the wrong label and allow the model to try to relax on this!
3. read the new image straight out of the hallucination
4. repeat for a bunch of images
5. using the new images, see if these images change AUC PR

In [None]:
def make_labels_adversarial(test_y_batch):
    a = np.copy(test_y_batch)
    for a_i,l in zip(a, np.argmax(a,1)):
        choices = list(range(len(a_i)))
        choices.remove(l)
        choice = np.random.choice(choices)
        a_i[choice] = 1
        a_i[l] = 0
    return a

def make_advx(model, x_batch, y_true, y_targets, attack_dynamics):
    model.reset_state()
    model.clamp_layer(0, x_batch.view(-1, model.dimensions[0])) # clamp image layer
    model.u_relax(**cfg['dynamics'])
    model.set_C_target(y_true.clone().detach())
    model.u_relax(**cfg['dynamics'])
    model.release_layer(0) # un-clamp just image layer
    # model.set_C_target(y_targets)
    model.clamp_layer(-1, y_targets.view(-1, model.dimensions[-1])) # clamps the output layer
    model.u_relax(**attack_dynamics)                                   # relax on the image layer
    return model.u[0].detach().to('cpu')

In [None]:
y_target = torch.tensor(make_labels_adversarial(test_y_batch)).float()

In [None]:
# Make sure y_target is always wrong
PRCurveOnDigit(
    test_y_batch,
    y_target,
    0
)
plt.title('''PR of (adversarial) Targets
(Should have low AUC)''')
None

In [None]:
cfg # used for training and prediction in the paper I got my trained model from

In [None]:
advx = make_advx(
    model,
    test_x_batch.to(config.device),
    test_y_batch.to(config.device),
    y_target.to(config.device),
    {
        'dt': 0.01,  # size of time steps
        'tau': 1,    # the time constant that dictates the amount of time it should take for singals to propagate between layers
        'tol': 0,    # amount of precision in fixed point, when to stop relaxing on s
        'n_relax': 1 # the number of steps to do before quiting the relaxation (either quit when tol is reached, or n_relax is reached).
    }
)
advx.shape

In [None]:
test_x_batch.reshape(advx.shape).shape

In [None]:
plt.hist(np.array(advx-test_x_batch.reshape(advx.shape)).ravel(), bins=100) # need to subtract original from advx, to see perturbation
plt.title('''Adversarial Perturbation''')
None

In [None]:
Image.fromarray(
    np.concatenate(
        np.array(
            torchvision.transforms.Normalize(
                (0.5,), (0.5,) # Data was normalized to some strange mean and std by preprocessing, we must undo this to visialize.
            )(advx.reshape([100, 1, 28, 28]))[:20]*255
        ),
        axis=2
    ).transpose([1, 2, 0])[:,:,0].astype(np.uint8)
)

In [None]:
# visualize the perturbation
scale_fac = 60;
Image.fromarray(
    np.concatenate(
        np.array(
            torchvision.transforms.Normalize(
                (0.5,), (0.5,) # Data was normalized to some strange mean and std by preprocessing, we must undo this to visialize.
            )((advx.reshape([100, 1, 28, 28]))-test_x_batch)[:20]*255*scale_fac
        ),
        axis=2
    ).transpose([1, 2, 0])[:,:,0].astype(np.uint8)
)

In [None]:
# visualize the attack with the perturbation enhanced, so it is extra noticable.
scale_fac = 60;
Image.fromarray(
    np.concatenate(
        np.array(
            torchvision.transforms.Normalize(
                (0.5,), (0.5,) # Data was normalized to some strange mean and std by preprocessing, we must undo this to visialize.
            )(test_x_batch+((advx.reshape([100, 1, 28, 28]))-test_x_batch)*scale_fac)[:20]*255
        ),
        axis=2
    ).transpose([1, 2, 0])[:,:,0].astype(np.uint8)
)

In [None]:
advx_out = train.predict_batch(model, test_x_batch.to(config.device), cfg['dynamics'], False)

In [None]:
advx_out[:20].argmax(axis=1) # predictions on advx

In [None]:
test_y_batch[:20].argmax(axis=1) # actual

In [None]:
# The guesses by the model on untainted images again (make sure we didnt do something funny like un-train it)
PRCurveOnDigit(
    test_y_batch,
    out.to('cpu'),
    0
)
plt.title('''PR on Clean Images
(Should have high AUC)''')
None

In [None]:
# The guesses by the model on adversarial images shouldn't match the truth
PRCurveOnDigit(
    test_y_batch,
    advx_out.to('cpu'),
    0
)
plt.title('''PR on Advx Images
(Should have low AUC)''')
None

In [None]:
# The guesses by the trained model on adversarial images dont't match the targets?????
PRCurveOnDigit(
    y_target,
    advx_out.to('cpu'),
    0
)
plt.title('''PR of Advx Images to Targets
(Should have high AUC)''')
None