In [1]:
# Written by: Alexandra
# Last edited: 2022/02/11

# =============================================================================
#  IMPORTS AND DEPENDENCIES
# =============================================================================

import numpy as np
import pandas as pd

from data import DataLoader
from attack import SimpleAttacker, RandomAttacker
from defence import RandomDefender, FeasibleSetDefender
from model import IrisClassifier
#from postprocessing import PostProcessor
from simulation import Simulator


from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


# =============================================================================
#  GLOBAL VARIABLES
# =============================================================================
# Load dataset
data = np.loadtxt("datasets/iris.dat") #already contains one-hot encoding for targets

# batch size
BATCH_SIZE = 10

# Model
# HIDDEN_NEURONS = (4, 16, 3) automicatically set in IrisClassifier
# ACTIVATIONS = ("relu", "softmax")  automicatically set in IrisClassifier
OPTIMISER = "adam"
LOSS_FUNC = "cross_entropy"
LEARNING_RATE = 0.01
EPOCHS = 100

def defender_initiator(**kwargs):
    # Returns a defender class depending on which strategy we are using
    # Currently only the RandomDefender is implemented, for who a reject_rate arg needs to be passed in
    for key, value in kwargs.items():
        if key == "defender_type":
            if value =="RandomDefender":
                rate = kwargs["reject_rate"]
                return RandomDefender(rate)
            elif value =="FeasibleSetDefender":
                rate = kwargs["reject_rate"]
                return FeasibleSetDefender(rate)

In [2]:
#define input and target data
X, y = data[:, :4], data[:, 4:]

#split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

#normalise data using sklearn module
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train, y_train = shuffle(X_train, y_train)

# Instantiate necessary classes
defender = defender_initiator(defender_type = "RandomDefender", reject_rate = 0.1)
attacker = RandomAttacker()
model = IrisClassifier(OPTIMISER, LOSS_FUNC, LEARNING_RATE)

In [3]:
simulator_0 = Simulator(X_train, y_train, model, attacker=None,
                  defender=None, batch_size=BATCH_SIZE)

simulator_0.learn_online()

simulator_0.model.test(X_test, y_test, BATCH_SIZE) 

simulator_1 = Simulator(X_train, y_train, model, attacker=attacker,
                  defender=None, batch_size=BATCH_SIZE)

simulator_1.learn_online()

simulator_1.model.test(X_test, y_test, BATCH_SIZE) 

simulator_2 = Simulator(X_train, y_train, model, attacker=None,
                  defender=defender, batch_size=BATCH_SIZE)

simulator_2.learn_online()

simulator_2.model.test(X_test, y_test, BATCH_SIZE) 

simulator_3 = Simulator(X_train, y_train, model, attacker=attacker,
                  defender=defender, batch_size=BATCH_SIZE)

simulator_3.learn_online()

simulator_3.model.test(X_test, y_test, BATCH_SIZE) 

results_full = {
    'baseline': simulator_0.results,
    'attacker_only': simulator_1.results,
    'defender_only': simulator_2.results,
    'attacker_defender': simulator_3.results
}

Train Epoch: 09 -- Batch: 000 -- Loss: 1.1719
Train Epoch: 19 -- Batch: 000 -- Loss: 1.1405
Train Epoch: 29 -- Batch: 000 -- Loss: 1.1283
Train Epoch: 39 -- Batch: 000 -- Loss: 1.1399
Train Epoch: 49 -- Batch: 000 -- Loss: 1.0839
Train Epoch: 59 -- Batch: 000 -- Loss: 1.0843
Train Epoch: 69 -- Batch: 000 -- Loss: 1.0793
Train Epoch: 79 -- Batch: 000 -- Loss: 1.1085
Train Epoch: 89 -- Batch: 000 -- Loss: 1.0639
Train Epoch: 99 -- Batch: 000 -- Loss: 1.0513
Train Epoch: 109 -- Batch: 000 -- Loss: 1.0483
Train Epoch: 119 -- Batch: 000 -- Loss: 1.0327

Test set: Average loss: 0.1013, Accuracy: 0.6667

Train Epoch: 09 -- Batch: 000 -- Loss: 2.1797
Train Epoch: 19 -- Batch: 000 -- Loss: 1.2678
Train Epoch: 29 -- Batch: 000 -- Loss: 2.4188
Train Epoch: 39 -- Batch: 000 -- Loss: 1.9207
Train Epoch: 49 -- Batch: 000 -- Loss: 0.8546
Train Epoch: 59 -- Batch: 000 -- Loss: 2.4861
Train Epoch: 69 -- Batch: 000 -- Loss: 1.6390
Train Epoch: 79 -- Batch: 000 -- Loss: 1.1192
Train Epoch: 89 -- Batch: 0

In [4]:
def wrap_results(simulators, labels):
    """
    Accepts a list of simulators, and a list of labels.
    Returns three dictionaries for X_stream, y_stream, and models.
    """
    
    assert len(simulators) == len(labels)
    all_results_X = {}
    all_results_y = {}
    all_results_models = {}
    
    for i, simulator in enumerate(simulators):
        X = simulator.results['X_stream']
        y = simulator.results['y_stream']
        models = simulator.results['models']
    
        all_results_X[labels[i]] = X
        all_results_y[labels[i]] = y
        all_results_models[labels[i]] = models

    return all_results_X, all_results_y, all_results_models

In [5]:
def data_stream_simulation_summary(results, reference_label='baseline', title=None):
    """
    Accepts a dictionary results, which contains one baseline simulation.
    Returns a dictionary of rejected and poisoned counts.
    """
    results_df = pd.DataFrame(results)
    
    number_of_rejected_points = {}
    number_of_poisoned_points = {}

    for col in results_df.columns:
        number_of_rejected_points[col] = 0
        number_of_poisoned_points[col] = 0

    for row in results_df.iterrows():
        true_val = row[1][reference_label]
        for col in results_df.columns:
            new_val = row[1][col]
            if new_val.size == 0:
                number_of_rejected_points[col] += 1
            elif not np.array_equal(new_val, true_val):
                number_of_poisoned_points[col] += 1
    
    handle1 = 'number_of_rejected_points'
    handle2 = 'number_of_poisoned_points'
    
    if title:
        handle1 = f'{handle1}_{title}'
        handle2 = f'{handle2}_{title}'
        
    summary = {
        handle1: number_of_rejected_points,
        handle2: number_of_poisoned_points,
    }
    return summary

In [6]:
def accuracy_summary(X_test, y_test, BATCH_SIZE, all_results_models):
    """
    This is just WIP. Not actually used anywhere.
    """
    labels = all_results_models.keys()
    accuracy = {}
    
    for label in labels:
        print(label)
        model = IrisClassifier(OPTIMISER, LOSS_FUNC, LEARNING_RATE)
        model.load_state_dict(all_results_models[label][-1])
        model.test(X_test, y_test, BATCH_SIZE) # this should return figures
        accuracy[label] = 0 # this should be per above
    
    return accuracy

In [7]:
# The below is for demo purposes only 


simulators = [simulator_0, simulator_1, simulator_2, simulator_3]
labels = ['baseline', 'attacker_only', 'defender_only', 'attacker_defender']

all_results_X, all_results_y, all_results_models = wrap_results(simulators, labels)


# Dummy Data
accuracy = {'baseline': 0.9333, 'attacker_only': 0.7000, 'defender_only': 0.9000, 'attacker_defender': 0.8000}
accuracy = pd.Series(accuracy, name= 'accuracy')

X_stream_stats = pd.DataFrame(data_stream_simulation_summary(all_results_X, title='X'))
y_stream_stats = pd.DataFrame(data_stream_simulation_summary(all_results_y, title='y'))

pd.concat([accuracy, X_stream_stats, y_stream_stats], axis = 1).T

Unnamed: 0,baseline,attacker_only,defender_only,attacker_defender
accuracy,0.9333,0.7,0.9,0.8
number_of_rejected_points_X,0.0,0.0,14.0,14.0
number_of_poisoned_points_X,0.0,0.0,0.0,0.0
number_of_rejected_points_y,0.0,0.0,14.0,14.0
number_of_poisoned_points_y,0.0,58.0,0.0,54.0
