In [1]:
# imports and functions, does nothing

from functools import partial
from IPython.display import Image, display, Video, Image, HTML
from matplotlib import rcParams
from prettytable import PrettyTable
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.utils.multiclass import unique_labels
import constants as cst
import glob
import json
import sys
sys.path.append('../..')
import lib.plot_builder as plot_builder
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas
import pathlib
import random
import sklearn
import random
import matplotlib
matplotlib.rcParams.update({'font.size': 14})

def print_table(data, fields=None):
    t = PrettyTable()
    for row in data:
        t.add_row(row)
    t.align = 'l'
    if fields != None:
        t.field_names = fields
    print(t)

def read_dataset(f):
    acc = None
    precision = None
    recall = None
    f1score = None
    with open(f) as f2:
        data = json.load(f2)
        acc = round(100*data['score']['accuracy'][0], 1)
        precision = round(100*data['score']['precision'][0], 1)
        recall = round(100*data['score']['recall'][0], 1)
        f1score = round(100*data['score']['f1score'][0], 1)

    cm = f.replace('datasets/', 'plots/').replace('.json', '-cm.png')
    fi = f.replace('datasets/', 'plots/').replace('.json', '-fi.png')

    return acc, precision, recall, f1score, cm, fi

def img(path, width=400):
    rnd = random.randint(0,2e9)
    return f"""<img src="{path}?nocache={rnd}" style="width:{width}px; "></img>"""

def latex_table(table, header):
    header_bold = ["\\textbf{"+t+"}" for t in header]
    table2 = [header_bold]
    table2.extend(table)
    latex_table = """\\begin{tabular}{lrrrr}
"""
    rows = [" & ".join(map(str,row)) for row in table2]
    latex_table += "".join(["    " + row + " \\\\\n" for row in rows])
    latex_table += """\\end{tabular}"""

    return latex_table

def dataset_name_to_friendly_name(name):
    root_dataset = name.replace('-netflow1000', '').replace('-netflow100', '').replace('-netflow10', '').replace('-netflows', '').replace('-netflow', '').replace('-defended', '').replace('-google', '').replace('-dummies', '').replace('-notime', '').replace('-nototsize', '').replace('-nosizetime', '').replace('-nosize', '').replace('-15169-view', '')
    nicename = name.replace('-netflow1000', "NF 0.1%").replace('-netflow100', "NF   1%").replace('-netflow10', "NF  10%").replace('-netflow', "NF 100%").replace('/', ' ').replace('-defended', ' (defended)').replace('-google', '(Google dest filter)').replace('-dummies', '(dummies)').replace('-notime', '(no times)').replace('-nototsize', '(no total sizes)').replace('-nosizetime', '(no size/times)').replace('-nosize', '(no sizes)').replace('-15169-view', '(Google view)')
    variant = nicename.replace(root_dataset, '')
    return root_dataset, nicename, variant

def find_pos(name, features):
    i = 0
    while i<len(features):
        if features[i].strip() == name:
            return i
        i += 1
    print("Couldn't find", name)
    sys.exit(0)

In [2]:
# loads datasets of interest
files = glob.glob("datasets/*.json")
files = [f for f in files]
files.sort()

In [3]:
# Print the table summary with Dataset + Accuracy
tuples = []

for f in files:
    if "-pad" in f or "-defended" in f:
        continue
    
    dataset = f.replace('datasets/', '').replace('.json', '')
    dataset_name, _, variant_name = dataset_name_to_friendly_name(dataset)

    acc, _, _, _, _, _ = read_dataset(f)
    tuples.append([dataset_name, variant_name, acc])

print_table(tuples, fields=["Dataset", "Variant", "Accuracy"])

+-----------------------------+-------------------------+----------+
| Dataset                     | Variant                 | Accuracy |
+-----------------------------+-------------------------+----------+
| quic-100p-338-ad+de         |                         | 94.1     |
| quic-100p-338-adblock       |                         | 93.9     |
| quic-100p-338-decentraleyes |                         | 91.9     |
| quic-100p-338               | (dummies)               | 65.0     |
| quic-100p-3382              | quic-100p-338(dummies)2 | 74.6     |
| quic-100p-338               | (no sizes)              | 60.7     |
| quic-100p-338               | (no times)              | 92.4     |
| quic-100p-338               |                         | 91.6     |
+-----------------------------+-------------------------+----------+


In [4]:
# plot all CM+FI from the datasets
def show(f):

    dataset2 = f.replace('datasets/', '').replace('.json', '')
    acc2, _, _, _, cm2, fi2 = read_dataset(f)

    display(HTML(f"""
    <div class="row" style="text-align:left">
            <table>
            <td style="text-align: left">
            <h2>{dataset2}</h2>
            <h3>Accuracy {acc2}%</h3>
            """ + img(cm2) + img(fi2) + f"""
            <td>
            </td>
            </tr>
            </table>
    </div>
    """))

for f in files:
    if "-netflow" in f:
        continue
    if "-defended" in f:
        continue

    print(f)
    #show(f)

display(HTML('<h1>Fingerprinting on HARs</h1>'))
show("datasets/quic-100p-338.json")
show("datasets/quic-100p-338-nosize.json")
show("datasets/quic-100p-338-notime.json")
show("datasets/quic-100p-338-dummies.json")
show("datasets/quic-100p-338-dummies2.json")

datasets/quic-100p-338-ad+de.json
datasets/quic-100p-338-adblock.json
datasets/quic-100p-338-decentraleyes.json
datasets/quic-100p-338-dummies.json
datasets/quic-100p-338-dummies2.json
datasets/quic-100p-338-nosize.json
datasets/quic-100p-338-notime.json
datasets/quic-100p-338.json


In [8]:
#!/usr/bin/python3

import src.features_from_hars as kffeatures
import src.attack as attack
import importlib
importlib.reload(kffeatures)
importlib.reload(attack)

overall_results = []

for load in [20]:

    path = "../../cf-clusters-datasets/just_hars/quic-100p-338-40-loops"
    npy_file = "quic-100p-338-dummies2.npy"
    json_file = npy_file.replace('.npy', ".json")
    defense_costs = kffeatures.build(path+"/quic-100p-338-40-loops-har.npy", npy_file, defense=lambda trace: kffeatures.defend_smart_dummies(trace, load=load), VARIANT='nofilter')

    mean_nb_packet = round(np.mean([x[0] for x in defense_costs]))
    mean_kB = round(np.mean([x[1] for x in defense_costs]))

    results = attack.run("datasets/"+npy_file)
    plot_builder.serialize("datasets/"+json_file, results)

    acc, _, _, _, cm, fi = read_dataset("datasets/"+json_file)

    overall_results.append([load, mean_nb_packet, mean_kB/1024, acc])


    print(f"Costs: {mean_nb_packet} pkts {mean_kB/1024} kB, Accuracy {acc}")


    display(HTML(f"""
        <div class="row" style="text-align:left">
                <table>
                <td style="text-align: left">
                <h2>{dataset}</h2>
                <h3>Accuracy {acc}%</h3>
                """ + img(cm) + img(fi) + f"""
                <td>
                </td>
                </tr>
                </table>
        </div>
        """))

print(overall_results)

Load is 50
Adding fake query [1.1231634631218785, 446, 51934]
Adding fake query [2.4254390630553027, 444, 5538]
Adding fake query [0.7310777383490393, 451, 7872]
Adding fake query [0.9998059733692544, 424, 34897]
Adding fake query [1.0422402586560229, 491, 3099]
Adding fake query [0.856965072700286, 502, 42755]
Adding fake query [3.1319564441569963, 490, 26691]
Adding fake query [0.9299165178304171, 352, 2412]
Adding fake query [2.0143627736070755, 431, 27782]
Adding fake query [0.7353851656789385, 506, 33124]
Adding fake query [2.5626389691540252, 435, 21453]
Adding fake query [1.0736980588199359, 466, 23596]
Adding fake query [2.4007583684742215, 318, 6892]
Adding fake query [2.459375354487364, 421, 48]
Adding fake query [1.0688249894485038, 571, 17655]
Adding fake query [0.7709020666203409, 609, 16508]
Adding fake query [0.7237989442921944, 550, 14656]
Adding fake query [0.955031144825068, 516, 8054]
Adding fake query [3.4040433869420736, 451, 4041]
Adding fake query [0.717443346034