In [1]:
!pip install scapy

In [2]:
#!git clone https://github.com/ymirsky/Kitsune-py.git

In [3]:
!ls

Data  kitsune  Kitsune_eval.ipynb  LICENSE  models  README.md  results


In [4]:
%cd kitsune

/home/himanshi/courses/sem8/cod891/tardigrade/kitsune


In [5]:
from tardigrade.kitsune import KitModel, KeyedKitModel
import numpy as np
import time

Importing Scapy Library


In [6]:
!pip install sklearn

from textwrap import fill
import datetime
import matplotlib.ticker as ticker
import matplotlib.dates as mdate
import sklearn.metrics as metrics
from itertools import product
from tqdm import tqdm
from matplotlib import cm
from matplotlib import pyplot as plt
from scipy.stats import norm
import numpy as np
import pickle
import matplotlib
import socket
import multiprocessing as mp
matplotlib.use('Agg')
np.set_printoptions(threshold=np.inf)
import torch
# matplotlib.rcParams['timezone']="Pacific/Auckland"

In [7]:
def squeeze_features(fv, precision):
    """rounds features to siginificant figures
    Args:
        fv (array): feature vector.
        precision (int): number of precisions to use.
    Returns:
        array: rounded array of floats.
    """
    fv_positive = np.where(np.isfinite(fv) & (
        fv != 0), np.abs(fv), 10**(precision-1))
    mags = 10 ** (precision - 1 - np.floor(np.log10(fv_positive)))
    return np.round(fv * mags) / mags

In [8]:
def eval_kitsune(path, model, threshold=None, ignore_index=-1, out_image=None, meta_file=None, record_scores=False, y_true=None, record_prediction=False, load_prediction=False, plot_with_time=False):
    """
    evaluates trained kitsune model on some traffic.
    Args:
        path (string): path to traffic feature file.
        model_path (string): path to trained kitsune model.
        threshold (float): anomaly threshold value, if None it calculates the threshold value as 3 std away from mean. Defaults to None.
        ignore_index (int): number of features to ignore at the start. Defaults to -1.
        out_image (string): path to output anomaly score image. Defaults to None.
        meta_file (string): path to metadata file, used to calculate evasion metrics. Defaults to None.
        record_scores (boolean): whether to record anomaly scores in a seperate csv file. Defaults to False.
    Returns:
        if has_meta: return number of positive samples and positive samples that are not craft packets.
        else: return number of positive samples
    """
    # the pcap, pcapng, or tsv file to process.
    print("evaluting", path)
    print("meta", meta_file)
    
    t = threshold
    roc_auc = 1
    label_map = []

    # with open(model_path, "rb") as m:
    #     kitsune = pickle.load(m)
    
    test_data = open(path, "r")


    if out_image == None:
        out_image = "kitsune_rmse.png"

    if meta_file is not None:
        meta = open(meta_file, "r")
        meta.readline()
        meta_row = meta.readline()
        has_meta = True
        pos_craft = 0
        pos_mal = 0
        pos_ignore = 0
    else:
        has_meta = False
        pos = 0

    labels = []
    times = []
    colours = []
    tbar = tqdm()
    if load_prediction:
        rmse_array = np.genfromtxt(
            "kitsune_score.csv", delimiter=",")
    else:
        counter = 0
        rmse_array = []

        if not has_meta:
            colours = None

        while True:

            if counter < ignore_index:
                if meta_file is not None:
                    meta_row = meta.readline()

                counter += 1
                continue

            pkt = test_data.readline()
            if pkt == "":
                break
            pkt = pkt.rstrip().split(",")
            pkt = [[float(x) for x in pkt]]
            pkt = torch.tensor(pkt).float()

            rmse = model.score(pkt)[0].item()
        

            if rmse==-1:
                break

            if rmse == 0:
                rmse_array.append(1e-2)
            
            else:
                rmse_array.append(rmse)
            
            counter += 1
            tbar.update(1)

            # set colours
            if has_meta:
                comment = meta_row.rstrip().split(",")[-1]
                if comment == "craft":
                    colours.append([67 / 255., 67 / 255., 67 / 255., 0.8])

                elif comment == "malicious":
                    colours.append([1, 0, 0, 1])
                else:
                    colours.append([204 / 255., 243 / 255., 1, 0.5])

            if threshold is not None and rmse > threshold:
                if has_meta:
                    comment = meta_row.rstrip().split(",")[-1]
                    if comment == "craft":
                        pos_craft += 1
                    elif comment == "malicious":
                        pos_mal += 1
                    elif comment == "attacker_low":
                        pos_ignore += 1
                    else:
                        print(meta_row)
                        print(rmse)
                        raise Exception
                else:
                    pos += 1

            if has_meta:
                meta_row = meta.readline()

    # if no threshold, calculate threshold
    if threshold == None:
        # threshold is min(mean+3std, max)
        benignSample = np.log(rmse_array)
        mean = np.mean(benignSample)
        std = np.std(benignSample)
        threshold_std = np.exp(mean + 3 * std)
        threshold_max = max(rmse_array)
        threshold = min(threshold_max, threshold_std)
        pos = (rmse_array > threshold).sum()

    # record prediction scores/rmse
    if record_scores:
        score_path = "kitsune_score.csv"
        threshold_path = "kitsune_threshold.csv"
        # print("max_rmse",np.max(rmse_array))
        np.savetxt(score_path, rmse_array, delimiter=",")
        np.savetxt(threshold_path, [threshold], delimiter=",")
        print("score saved to", score_path)

    # record prediction labels
    if record_prediction:
        pred_path = "kitsune_prediction.csv"
        # np.savetxt(pred_path, rmse_array > threshold, delimiter=",")
        np.savetxt(pred_path, np.where(np.array(rmse_array) >= threshold)[0], delimiter=",")
        print("kitsune prediction saved to", pred_path)

    if y_true is None:

        fpr, tpr, roc_t = metrics.roc_curve(
            [0 for i in range(len(rmse_array))], rmse_array, drop_intermediate=False)
    else:
        fpr, tpr, roc_t = metrics.roc_curve(
            y_true, rmse_array, drop_intermediate=True)
        roc_auc = metrics.auc(fpr, tpr)
    print("total packets:", len(rmse_array))

    if out_image is not None:
        cmap = plt.get_cmap('Set3')
        num_packets = len(rmse_array)
        f, (ax1, ax2) = plt.subplots(
            2, 1, constrained_layout=True, figsize=(10, 10), dpi=200)

        if times and plot_with_time:
            x_val = times
            date_fmt = '%m/%d %H:%M:%S'

            date_formatter = mdate.DateFormatter(date_fmt)
            ax1.xaxis.set_major_formatter(date_formatter)

            # tick every 4 hours
            # print("asdfs")
            ax1.xaxis.set_major_locator(ticker.MultipleLocator(1 / 6))

            ax1.tick_params(labelrotation=90)
            # f.autofmt_xdate()
        else:
            x_val = range(len(rmse_array))

        if labels:
            (unique, counts) = np.unique(labels, return_counts=True)
            frequencies = np.asarray((unique, counts)).T
            for i in frequencies:
                label_map[i[0]] = "{} {}".format(label_map[i[0]], i[1])

            scatter = ax1.scatter(x_val, rmse_array,
                                  s=1, c=labels, alpha=0.05, cmap=cmap)
            # wrap legends
            labels = [fill(l, 20) for l in label_map]

            leg = ax1.legend(handles=scatter.legend_elements()[0], labels=labels, bbox_to_anchor=(1.01, 1),
                             loc='upper left', borderaxespad=0.)
            for lh in leg.legendHandles:
                lh._legmarker.set_alpha(1.)

        elif has_meta:
            ax1.scatter(x_val, rmse_array, s=1, c=colours)
        else:
            ax1.scatter(x_val, rmse_array, s=1, alpha=0.05)

        # max_rmse=np.max(rmse_array)
        # print(max_rmse)

        ax1.axhline(y=threshold, color='r', linestyle='-')
        ax1.set_yscale("log")
        # ax1.set_title("Anomaly Scores from Kitsune_{} Execution Phase".format(
        #     model_path.split("/")[-1]))
        ax1.set_ylabel("RMSE (log scaled)")
        if has_meta:
            ax1.set_xlabel(
                "packet index \n packets over threshold {}".format(pos_mal + pos_craft))
        else:
            ax1.set_xlabel(
                "packet index \n packets over threshold {}".format(pos))

        if y_true is None:
            ax2.plot(fpr, roc_t, 'b')
            ax2.set_ylabel("threshold")
            ax2.set_xlabel("false positive rate")
        else:
            ax2.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
            ax2.set_title('AUC = %0.2f' % roc_auc)
            ax2.set_ylabel("true positive rate")
            ax2.set_xlabel("false positive rate")
        # plt.tight_layout()
        f.savefig(out_image)
        print("plot path:", out_image)
        plt.close()
    tbar.close()
    if has_meta:
        return pos_mal, pos_craft, pos_ignore
    else:
        if t is None:
            return pos, threshold
        else:
            return pos, roc_auc

In [9]:
# print("Unzipping Sample Capture...")
# import zipfile
# with zipfile.ZipFile("mirai.zip","r") as zip_ref:
#     zip_ref.extractall()

In [10]:
"""
    Kitsune Evaluation
"""

data = "../Data/traffic.csv"

# Extract features
# print("Extracting features")
# extract_features("../Data/traffic.tsv", "../Data/traffic.csv")


# Train model on "../Data/traffic.csv"
print("Training model on " + data)
model = KitModel()

# Train model
model.train_model(data)
print("Training complete")

# Store model
model.store_model("kitsune.pt")

"""
    Keyed Kitsune Evaluation
"""

# data = "../Data/_transformed_traffic.csv"

# # # Transform features
# # print("Transforming features")
# # transform_features("../Data/traffic.csv", "../Data/transformed_traffic.csv")

# # Train model on "../Data/traffic.csv"
# print("Training Keyed Kitsune model on " + data)
# model = KeyedKitModel()

# # Train model
# model.train_model(data)
# print("Training complete")

# # Store model
# model.store_model("KeyedKitsune.pt")

Training model on ../Data/traffic.csv




🦊 Epoch [0]  tail losses 5.68691  head loss: 0.12126
🦊 Epoch [0]  tail losses 5.68010  head loss: 0.12159
🦊 Epoch [0]  tail losses 5.67332  head loss: 0.12191
🦊 Epoch [0]  tail losses 5.66631  head loss: 0.12224
🦊 Epoch [0]  tail losses 5.65958  head loss: 0.12256
🦊 Epoch [0]  tail losses 5.65230  head loss: 0.12292
🦊 Epoch [0]  tail losses 5.64786  head loss: 0.12308
🦊 Epoch [0]  tail losses 5.64287  head loss: 0.12329
🦊 Epoch [0]  tail losses 5.63769  head loss: 0.12350
🦊 Epoch [0]  tail losses 5.63232  head loss: 0.12373
🦊 Epoch [0]  tail losses 5.62754  head loss: 0.12392
🦊 Epoch [0]  tail losses 5.62164  head loss: 0.12418
🦊 Epoch [0]  tail losses 5.61669  head loss: 0.12438
🦊 Epoch [0]  tail losses 5.61203  head loss: 0.12456
🦊 Epoch [0]  tail losses 5.60624  head loss: 0.12482
🦊 Epoch [0]  tail losses 5.60110  head loss: 0.12503
🦊 Epoch [0]  tail losses 5.59567  head loss: 0.12526
🦊 Epoch [0]  tail losses 5.59090  head loss: 0.12545
🦊 Epoch [0]  tail losses 5.58604  head loss: 0

'\n    Keyed Kitsune Evaluation\n'

In [11]:
# eval_kitsune(path,K,record_prediction=True)
eval_kitsune(data, model, threshold=None, out_image="KeyedKitsuneRes_png", meta_file=None, record_scores=True, y_true=None, record_prediction=True, load_prediction=False, plot_with_time=True)

evaluting ../Data/traffic.csv
meta None


764124it [1:05:28, 276.13it/s]

score saved to kitsune_score.csv
kitsune prediction saved to kitsune_prediction.csv




total packets: 764136


764136it [1:05:36, 194.10it/s]

plot path: KeyedKitsuneRes_png





(100, 0.24941907297291574)

In [12]:
# !git clone https://github.com/swainsubrat/adversarial-kitsune.git
!git clone https://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@github.com/swainsubrat/adversarial-kitsune.git

# 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

In [14]:
import csv

def evaluation_metrics(scores_path, threshold_path):
    # Calulate the F1 score for Model using ground labels
    # Actual outputs from kitsune_score.csv
    y_output = csv.reader(open(scores_path, 'r'))
    y_output = list(y_output)
    y_output = [float(i[0]) for i in y_output]

    # let ground labels be 0(benign) for all pkts
    y_true = np.ones(len(y_output))

    # Threshold for F1 score
    threshold = csv.reader(open(threshold_path, 'r'))
    threshold = list(threshold)
    threshold = float(threshold[0][0])

    # Predicted labels
    y_pred = np.array([0 if i > threshold else 1 for i in y_output])

    # 0 -> benign and 1 -> malicious

    tn, fp, fn, tp = metrics.confusion_matrix(y_true, y_pred).ravel()
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1 = 2*(precision*recall)/(precision+recall)

    print("True Negatives: ", tn)
    print("False Positives: ", fp)
    print("False Negatives: ", fn)
    print("True Positives: ", tp)

    print("Precision: ", precision)
    print("Recall: ", recall)
    print("F1 score: ", f1)

In [15]:
# Evaluation metrics for Kitsune
scores_path = '../results/Kitsune/kitsune_score.csv'
threshold_path = '../results/Kitsune/kitsune_threshold.csv'

print("Evaluation metrics for Kitsune: ")
evaluation_metrics(scores_path, threshold_path)

print()

# Evaluation metrics for Keyed Kitsune
scores_path = '../results/KeyedKitsune/kitsune_score.csv'
threshold_path = '../results/KeyedKitsune/kitsune_threshold.csv'

print("Evaluation metrics for Keyed Kitsune: ")
evaluation_metrics(scores_path, threshold_path)