# Contribution
Andrew - Generated fork repository, contributed in programming process, ran programs locally, co-authored task 1 report

JJ - Contributed in programming process, co-authored task 1 report

Stephen - Contributed in programming process, co-authored task 1 report

# Implementation
In this report, we successfully implemented the attack of three different models; an Undefended Model, a Vanilla Athena Ensemble Model, and a PGD-ADT model. We chose to craft adversarial examples using the Fast Gradient Sign Method (FGSM), Jacobian-based Saliency Map Attack (JSMA), and Projected Gradient Descent (PGD).

### FGSM
The Fast Gradient Sign Method generates an adversarial example which maximizes loss by using gradients of the loss function with respect to the input image. With this method, we find out how much each input pixel contributes to the loss and add perturbations in order to maximize loss in the adversarial example.

### BIM
The Basic Iterative Method (BIM), also known as the Iterative FGSM (I-FGSM) is an extension of FGSM in which the attack performs a simple FGSM attack multiple times with smaller step sizes. 

### PGD
The Projected Gradient Descent is a type of attack where the attacker has knowledge of the modelâ€™s gradients/weights. (A matrix that corresponds to how the model weighs each particular feature it detects) This threat model focuses on finding the perturbation that maximises the error of particular feature gradient of an input without crossing some threshold labelled as epsilon. The goal here is to find the minimum gradient of features that an input must contain to be classified incorrectly. 

# Experimentation

For task one, our group decided to plot the following attacks:
     - FGSM
         Epsilons: 0.1, 0.5, 0.9
     - BIM
         Epsilons: 0.1, 0.5, 0.9
     - PGD
         Epsilons: 0.1, 0.5, 0.9
         
We used the following defense models:
    - Undefended Model
        No defense configuration.
    - Vanilla Athena Average Probability Model
        19 weak defenses consisting of a clean model followed by 18
        models trained on transformed input data. The full 
        configuration of this model can be located at
        "configs/task1/athena-mnist.json".
    - PGD Model
        A provided baseline model.
        
### Results
RESULTS WILL GO HERE

# Generating Adversarial Examples

In [3]:
import argparse
import numpy as np
import os
import time
from matplotlib import pyplot as plt

from utils.model import load_lenet
from utils.file import load_from_json
from utils.metrics import error_rate
from attacks.attack import generate

ModuleNotFoundError: No module named 'utils'

In [6]:
def generate_ae(model, data, labels, attack_configs, save=False, output_dir=None):
    """
    Generate adversarial examples
    :param model: WeakDefense. The targeted model.
    :param data: array. The benign samples to generate adversarial for.
    :param labels: array or list. The true labels.
    :param attack_configs: dictionary. Attacks and corresponding settings.
    :param save: boolean. True, if save the adversarial examples.
    :param output_dir: str or path. Location to save the adversarial examples.
        It cannot be None when save is True.
    :return:
    """
    img_rows, img_cols = data.shape[1], data.shape[2]
    num_attacks = attack_configs.get("num_attacks")
    data_loader = (data, labels)

    if len(labels.shape) > 1:
        labels = np.asarray([np.argmax(p) for p in labels])

    # generate attacks one by one
    for id in range(num_attacks):
        key = "configs{}".format(id)
        data_adv = generate(model=model,
                            data_loader=data_loader,
                            attack_args=attack_configs.get(key)
                            )
        print(attack_configs.get(key))
        # predict the adversarial examples
        predictions = model.predict(data_adv)
        predictions = np.asarray([np.argmax(p) for p in predictions])

        err = error_rate(y_pred=predictions, y_true=labels)
        print(">>> error rate:", err)

        # plotting some examples
        num_plotting = min(data.shape[0], 0)
        for i in range(10):
            img = data_adv[i].reshape((img_rows, img_cols))
            plt.imshow(img, cmap='gray')
            title = '{}: {}->{}'.format(attack_configs.get(key).get("description"),
                                        labels[i],
                                        predictions[i]
                                        )
            desc = str(attack_configs.get(key).get("description"))
            initial_label = str(labels[i])#AS
            predicted_label = str(predictions[i])#AS
            plt.title(title)
            # Save plot
            plt.savefig("../../results/figures/"+desc+"/"+initial_label+"->"+predicted_label+".jpg")
            plt.close()

        # save the adversarial example
        if save:
            if output_dir is None:
                raise ValueError("Cannot save images to a none path.")
            # save with a random name
            file = os.path.join(output_dir, "{}.npy".format(desc))
            print("Save the adversarial examples to file [{}].".format(file))
            np.save(file, data_adv)

In [9]:
# parse configurations (into a dictionary) from json file
model_configs = load_from_json('../configs/task1/model-mnist.json')
data_configs = load_from_json('../configs/task1/data-mnist.json')
attack_configs = load_from_json('../configs/task1/attack-zk-mnist.json')

# load the targeted model
model_file = os.path.join(model_configs.get("dir"), model_configs.get("um_file"))
target = load_lenet(file=model_file, wrap=True)

# load the benign samples
data_file = os.path.join(data_configs.get('dir'), data_configs.get('bs_file'))
data_bs = np.load(data_file)

# load the corresponding true labels
label_file = os.path.join(data_configs.get('dir'), data_configs.get('label_file'))
labels = np.load(label_file)

# generate adversarial examples for a small subset
data_bs = data_bs[:10]
labels = labels[:10]
generate_ae(model=target, data=data_bs, labels=labels, attack_configs=attack_configs,save=False,output_dir=data_configs.get("dir"))

NameError: name 'load_from_json' is not defined

# Evaluate Adversarial Examples

In [11]:
def evaluate(trans_configs, model_configs,
             data_configs, save=False, output_dir=None):
    """
    Apply transformation(s) on images.
    :param trans_configs: dictionary. The collection of the parameterized transformations to test.
        in the form of
        { configsx: {
            param: value,
            }
        }
        The key of a configuration is 'configs'x, where 'x' is the id of corresponding weak defense.
    :param model_configs:  dictionary. Defines model related information.
        Such as, location, the undefended model, the file format, etc.
    :param data_configs: dictionary. Defines data related information.
        Such as, location, the file for the true labels, the file for the benign samples,
        the files for the adversarial examples, etc.
    :param save: boolean. Save the transformed sample or not.
    :param output_dir: path or str. The location to store the transformed samples.
        It cannot be None when save is True.
    :return:
    """
    # Load the baseline defense (PGD-ADT model)
    baseline = load_lenet(file=model_configs.get('pgd_trained'), trans_configs=None,
                                  use_logits=False, wrap=False)

    # get the undefended model (UM)
    file = os.path.join(model_configs.get('dir'), model_configs.get('um_file'))
    undefended = load_lenet(file=file,
                            trans_configs=trans_configs.get('configs0'),
                            wrap=True)
    print(">>> um:", type(undefended))

    # load weak defenses into a pool
    pool, _ = load_pool(trans_configs=trans_configs,
                        model_configs=model_configs,
                        active_list=True,
                        wrap=True)
    # create an AVEP ensemble from the WD pool
    wds = list(pool.values())
    print(">>> wds:", type(wds), type(wds[0]))
    ensemble = Ensemble(classifiers=wds, strategy=ENSEMBLE_STRATEGY.AVEP.value)

    # load the benign samples
    bs_file = os.path.join(data_configs.get('dir'), data_configs.get('bs_file'))
    x_bs = np.load(bs_file)
    img_rows, img_cols = x_bs.shape[1], x_bs.shape[2]

    # load the corresponding true labels
    label_file = os.path.join(data_configs.get('dir'), data_configs.get('label_file'))
    labels = np.load(label_file)

    # get indices of benign samples that are correctly classified by the targeted model
    print(">>> Evaluating UM on [{}], it may take a while...".format(bs_file))
    pred_bs = undefended.predict(x_bs)
    corrections = get_corrections(y_pred=pred_bs, y_true=labels)

    # Evaluate AEs.
    results = {}
    ae_list = data_configs.get('ae_files')
    ae_file = os.path.join(data_configs.get('dir'), ae_list[4])
    x_adv = np.load(ae_file)

    # evaluate the undefended model on the AE
    print(">>> Evaluating UM on [{}], it may take a while...".format(ae_file))
    pred_adv_um = undefended.predict(x_adv)
    err_um = error_rate(y_pred=pred_adv_um, y_true=labels, correct_on_bs=corrections)
    # track the result
    results['UM'] = err_um

    # evaluate the ensemble on the AE
    print(">>> Evaluating ensemble on [{}], it may take a while...".format(ae_file))
    pred_adv_ens = ensemble.predict(x_adv)
    err_ens = error_rate(y_pred=pred_adv_ens, y_true=labels, correct_on_bs=corrections)
    # track the result
    results['Ensemble'] = err_ens

    # evaluate the baseline on the AE
    print(">>> Evaluating baseline model on [{}], it may take a while...".format(ae_file))
    pred_adv_bl = baseline.predict(x_adv)
    err_bl = error_rate(y_pred=pred_adv_bl, y_true=labels, correct_on_bs=corrections)
    # track the result
    results['PGD-ADT'] = err_bl

    print(">>> Evaluations on [{}]:\n{}".format(ae_file, results))

In [12]:
# parse configurations (into a dictionary) from json file
trans_configs = load_from_json('../configs/demo/athena-mnist.json')
model_configs = load_from_json('../configs/demo/model-mnist.json')
data_configs = load_from_json('../configs/demo/data-mnist.json')

# -------- test transformations -------------
evaluate(trans_configs=trans_configs,
         model_configs=model_configs,
         data_configs=data_configs,
         save=False,
         output_dir=args.output_root)

NameError: name 'load_from_json' is not defined