In [2]:
# Turn off Tensorflow warnings

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

## Key imports

import numpy as np
import tensorflow as tf
import keras as k
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import random

tf.keras.utils.disable_interactive_logging()

## Helper functions

def load_object(filename):
    with open(filename, 'rb') as imp:  # Overwrites any existing file.
        res = pickle.load(imp)
    return res


    


In [3]:
### Load objects 

lstm_test_data = load_object('lstm_test_data.pkl')
lstm_test_labels = load_object('lstm_test_labels.pkl')
lstm_test_data = np.concatenate([lstm_test_data[d] for d in ['browse','chat','mail','p2p']])
lstm_test_labels = np.concatenate([lstm_test_labels[d] for d in ['browse','chat','mail','p2p']])
scaler = load_object('scaler.pkl')

stacked_cnn_lstm_sae = tf.keras.saving.load_model('stacked_cnn_lstm_sae.77.keras')

In [8]:
import glob

data_path = "Data/"
browse_data_path = data_path + "browse/"
chat_data_path = data_path + "chat/"
mail_data_path = data_path + "mail/"
p2p_data_path =  data_path + "p2p/"
data_paths = [browse_data_path, chat_data_path, mail_data_path, p2p_data_path]

datasets = dict()
sample_data = dict()
sample_labels = dict()
scaled_sample_data = dict()
test_sample_data = dict()

for (index, path) in  enumerate(data_paths):
    datasets[path] = list(glob.glob(path + "*.pcap"))
    sample_data[path] = []

    for sample_path in datasets[path]:
        s = np.loadtxt(sample_path, delimiter=',').reshape((-1,6))
        if s.shape[0] >= 15:
            sample_data[path].append(s)


for (idx, path) in enumerate(data_paths):
    acc = []
    samples = sample_data[path]
    scaled_sample_data[path] = []
    for sample in samples:
        scaled_sample = scaler.transform(sample)
        if scaled_sample.shape[0] % 5 == 0:
            s = scaled_sample
        else:
            s =  scaled_sample[:-(scaled_sample.shape[0]%5)]
        new_sample = np.stack(np.array_split(s, sample.shape[0]//5))
        res = tf.argmax(stacked_cnn_lstm_sae(new_sample)).numpy()
        vals, counts = np.unique(res, return_counts=True)
        mr = vals[np.argmax(counts)]
        if mr == idx:
            acc.append(tf.convert_to_tensor(new_sample))
    scaled_sample_data[path] = acc

for path in data_paths:
    test_sample_data[path] = random.sample(scaled_sample_data[path], 5)
    
for (idx, path) in enumerate(data_paths):
    sample_labels[path] = []
    for sample in test_sample_data[path]:
        sample_labels[path].append(np.tile(np.eye(4)[idx], (sample.shape[0], 1)))


# p2p_sample = np.array_split(scaled_p2p_sample_data[:-(scaled_p2p_sample_data.shape[0]%5)],
#                         scaled_p2p_sample_data.shape[0]//5)
# p2p_sample = tf.convert_to_tensor(p2p_sample)

# browse_sample = np.array_split(scaled_browse_sample_data[:-(scaled_browse_sample_data.shape[0]%5)],
#                         scaled_browse_sample_data.shape[0]//5)
# browse_sample = tf.convert_to_tensor(browse_sample)


# mail_sample = np.array_split(scaled_mail_sample_data[:-(scaled_mail_sample_data.shape[0]%5)],
#                         scaled_mail_sample_data.shape[0]//5)
# mail_sample = tf.convert_to_tensor(mail_sample)


# p2p_label = np.tile(np.array([0.0, 0.0, 0.0, 1.0]),  (p2p_sample_data.shape[0]//5, 1))
# browse_label = np.tile(np.array([1.0, 0.0, 0.0, 0.0]),  (browse_sample_data.shape[0]//5, 1))
# mail_label = np.tile(np.array([0.0, 0.0, 1.0, 0.0]),  (mail_sample_data.shape[0]//5, 1))

# # Code below based on https://www.tensorflow.org/tutorials/generative/adversarial_fgsm

loss_object = tf.keras.losses.CategoricalCrossentropy()

def create_adversarial_pattern(sample, label):
    with tf.GradientTape() as tape:
        tape.watch(sample)
        prediction = stacked_cnn_lstm_sae(sample)
        loss = loss_object(label, prediction)
        
    # Get the gradients of the loss w.r.t to the input image.
    gradient = tape.gradient(loss, sample)
    # Get the sign of the gradients to create the perturbation
    signed_grad = tf.sign(gradient)
    return signed_grad

# This function is the "reverse" of scikit-learns standard scaler, mapping 
# scaled values back to the unscaled input space

def reverse_scaler(scaler, scaled_input):
    res = scaled_input
    if scaler.with_std:
        res *= scaler.scale_
    if scaler.with_mean:
        res += scaler.mean_
    return res

def make_result_sane(result):
    result[0] = np.ceil(result[0])
    result[1] = np.ceil(result[1])
    result[2] = 100*result[0]
    result[3] = 8*result[1]*100
    result[4] = result[4] if result[0] > 1 else 0
    result[5] = result[1]/result[0] if result[0] != 0 else 0
    return result

def scale(scaler):
    def _(inp):
        res = inp
        if scaler.with_mean:
            res -= scaler.mean_
        if scaler.with_std:
            res /= scaler.scale_
        return res
    return _

In [9]:
epsilons = [0.0,0.01,0.05,0.1,0.2,0.25]

with open('fgsm.log', 'a') as logfile:
    for path in data_paths:
        print(path + ":\n\n\n", file=logfile)
        for (idx, sample) in enumerate(test_sample_data[path]):
            print("Scaled test sample:", test_sample_data[path][idx], file=logfile)
            print("Label:", sample_labels[path][idx], file=logfile)
            perturbations = create_adversarial_pattern(test_sample_data[path][idx], sample_labels[path][idx])
            print("Adversarial perturbation:", perturbations, "\n\n", file=logfile)
            for eps in epsilons:
                print("Epsilon =", eps, file=logfile)
                adversarial = test_sample_data[path][idx] + eps*perturbations
                print("Adversarial sample:", adversarial, file=logfile)
                unscaled_adversarial = reverse_scaler(scaler, adversarial).numpy()
                print("Unscaled adversarial sample:", unscaled_adversarial, file=logfile)
                sane_adversarial = np.apply_along_axis(make_result_sane, 2, unscaled_adversarial)
                print("Unscaled adversarial sample made_sane:", sane_adversarial, file=logfile)
                scaled_adversarial = np.apply_along_axis(scale(scaler), 2, sane_adversarial)
                print("Scaled adversarial sample made sane:", scaled_adversarial, file=logfile)
                results = stacked_cnn_lstm_sae.predict(scaled_adversarial).argmax(axis=-1)
                print("Prediction results:", results, file=logfile)
                vals, counts = np.unique(results, return_counts=True)
                for (v, c) in zip(vals, counts):
                    print("Class", v, "occurs", c, "times",  file=logfile)
                print(file=logfile)
    print("END LOG", file=logfile)