In [None]:
"""
Define global variables and functions.
"""

import pandas as pd
import matplotlib.pyplot as plt

# Global variables
FIG_WIDTH           = 3.49
LINE_THICKNESS      = 0.5
GRID_LINE_THICKNESS = 0.25

# Pandas settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Matplotlib settings
plt.rcParams.update({
    'font.family': 'Times New Roman',
    'font.size': 9,
    'axes.titlesize': 8,
    'axes.labelsize': 8,
    'xtick.labelsize': 7,
    'ytick.labelsize': 7,
    'legend.fontsize': 8,
    'figure.titlesize': 12
})


In [None]:
"""
Retrieve event signature trees.
"""

import os
from pathlib import Path
import glob
from enum import StrEnum
import json


class DeviceTypes(StrEnum):
    """
    Enum class for the device types.
    """
    PLUG   = "plug"
    LIGHT  = "light"
    CAMERA = "camera"


class ExpCases(StrEnum):
    """
    Enumerates the experimental cases,
    i.e. the pairs device-app.
    """
    ## Power plugs
    # TP-Link HS110
    TPLINK_PLUG = "TpLinkPlug"
    TPLINK_PLUG_TAPO = "TpLinkPlugTapo"
    TPLINK_PLUG_SMARTTHINGS = "TpLinkPlugSmartThings"
    # SmartThings Outlet
    SMARTTHINGS_OUTLET = "SmartThingsOutlet"
    # Tapo P110
    TAPO_PLUG = "TapoPlug"
    TAPO_PLUG_SMARTTHINGS = "TapoPlugSmartThings"
    # WOOX plug (Tuya)
    TUYA_PLUG = "TuyaPlug"
    ## Cameras
    # Xiaomi
    XIAOMI_CAMERA = "XiaomiCamera"
    # Tapo C200
    TAPO_CAMERA = "TapoCamera"
    TAPO_CAMERA_SMARTTHINGS = "TapoCameraSmartThings"
    # D-Link
    DLINK_CAMERA = "DLinkCamera"
    ## Light bulbs
    # Philips Hue
    HUE_LIGHT = "HueLight"
    HUE_LIGHT_ESSENTIALS = "HueLightEssentials"
    HUE_LIGHT_SMARTTHINGS = "HueLightSmartThings"
    # Alecto (Tuya)
    TUYA_LIGHT = "TuyaLight"
    # Tapo L530E
    TAPO_LIGHT = "TapoLight"
    TAPO_LIGHT_SMARTTHINGS = "TapoLightSmartThings"


class DictKeysMetrics(StrEnum):
    """
    Enum class for the keys of the metrics dictionary.
    """
    METRICS                   = "metrics"
    FILE_TREE                 = "file_tree"
    N_FIRST_LVL_NODES         = "n_first_lvl_nodes"
    N_TOTAL_NODES             = "n_total_nodes"
    MAX_DEPTH                 = "max_depth"
    MAX_UNIQUE_DEPTH          = "max_unique_depth"
    N_UNIQUE_NODES            = "n_unique_nodes"
    NODE_DISCOVERY_DEPTH      = "pdd"
    FIRST_LEVEL_NODE_COVERAGE = "flnc"
    FIRST_LEVEL_NODE_LOSS     = "flnl"
    ROBUSTNESS_SCORE          = "robustness_score"
    DOMAIN_NAMES_LVL_1        = "domain_names_lvl_1"
    DOMAIN_NAMES_HIDDEN       = "domain_names_hidden"
    DNS_SERVERS_LVL_1         = "dns_servers_lvl_1"
    DNS_SERVERS_HIDDEN        = "dns_servers_hidden"


## Paths
BASE_DIR = os.getcwd()
DEVICES_DIR = os.path.join(BASE_DIR, "devices")

list_trees = []
for case in ExpCases:
    case = case.value
    paths_trees = glob.glob(os.path.join(DEVICES_DIR, "*", case, "node_pruning", "*", "tree.json"))
    list_trees.extend(paths_trees)

list_event_dirs = [Path(tree).parents[1] for tree in list_trees]
fields_to_skip = ["all_events", "full", "raw", "no_boot", "device"]


# Build mapping of device to event signature trees
d = {}
for event_dir in list_event_dirs:
    event = os.path.basename(event_dir)
    device_dir = os.path.dirname(event_dir)
    device = os.path.basename(device_dir)
    d[device] = {DictKeysMetrics.METRICS.name: {}}


    ## Get list of event signature trees

    # All events
    list_json_file_tree_all_events = glob.glob(os.path.join(device_dir, "tree.all_events.json"))
    for json_file_tree in list_json_file_tree_all_events:
        d[device][DictKeysMetrics.METRICS.name]["all_events"] = {DictKeysMetrics.FILE_TREE.name: json_file_tree}

    # Full device profile
    list_json_file_tree_full = glob.glob(os.path.join(device_dir, "node_pruning", "tree.device.json"))
    for json_file_tree in list_json_file_tree_full:
        d[device][DictKeysMetrics.METRICS.name]["device"] = {DictKeysMetrics.FILE_TREE.name: json_file_tree}
    
    # Single events
    list_json_file_tree_events = glob.glob(os.path.join(device_dir, "node_pruning", "*", "tree.json"))
    for json_file_tree in list_json_file_tree_events:
        event = Path(json_file_tree).parent.name
        d[device][DictKeysMetrics.METRICS.name][event] = {DictKeysMetrics.FILE_TREE.name: json_file_tree}

# Print tree JSON file paths
print(f"Number of unique events: {sum(1 for _, metrics in d.items() for _ in metrics[DictKeysMetrics.METRICS.name])}")
print(json.dumps(d, indent=2))


In [None]:
"""
Add classifiers to the dictionary.
"""

from pprint import pprint
from enum import Enum
import json
import yaml


class DictKeysMetadata(Enum):
    """
    Enum class for the classifier keys in the devices' dictionary.
    """
    METADATA     = "metadata"
    DEVICE_TYPE  = "device-type"
    MANUFACTURER = "manufacturer"
    APP          = "app"


class Apps(Enum):
    """
    Enum class for the covered companion apps.
    """
    SMARTTHINGS = "SmartThings"
    TUYA        = "Tuya"
    OFFICIAL    = "official"
    OTHER       = "other"


for device in d:
    with open(os.path.join(BASE_DIR, "devices", "devices.yaml"), "r") as f:
        metadata_devices = yaml.safe_load(f)
    
    d[device][DictKeysMetadata.METADATA.name] = {
        DictKeysMetadata.DEVICE_TYPE.name: metadata_devices[device]["device-type"],
        DictKeysMetadata.MANUFACTURER.name: metadata_devices[device]["manufacturer"],
        DictKeysMetadata.APP.name: metadata_devices[device].get("app", Apps.OFFICIAL.value)
    }
    

# Print devices' dictionary
pprint(d)
#print(json.dumps(d, indent=2))


In [None]:
"""
Domain names-related functions.
"""

from ipaddress import ip_address


def is_domain_name(hostname: str) -> bool:
    """
    Check if the hostname is a domain name.

    Args:
        hostname (str): Hostname to check.
    Returns:
        bool: True if the hostname is a domain name, False otherwise.
    """
    try:
        ip_address(hostname)
    except ValueError:
        return "." in hostname
    else:
        return False


def is_supported_field(protocol: str, field: str) -> bool:
    """
    Check if the given protocol field could contain a domain name.

    Args:
        protocol (str): protocol
        field (str): protocol field
    Returns:
        bool: True if the field is supported, False otherwise.
    """
    supported_fields = [
        ("ipv4", "src"), ("ipv4", "dst"),
        ("ipv6", "src"), ("ipv6", "dst"),
        ("dns", "qname")
    ]
    return (protocol, field) in supported_fields


def get_domain_names(policy: dict) -> set[str]:
    """
    Get the set of domain names a given policy accepts.

    Args:
        policy (dict): Policy to analyze.
    Returns:
        set[str]: Set of domain names.
    """
    domain_names = set()

    for protocol, fields in policy["protocols"].items():
        for field, value in fields.items():
            if is_supported_field(protocol, field):
                if not isinstance(value, list):
                    value = [value]
                for v in value:
                    if is_domain_name(v):
                        domain_names.add(v)

    return domain_names


In [None]:
"""
DNS servers-related functions.
"""


def contains_dns_query(policy: dict) -> bool:
    """
    Checks if a given policy pertains to a DNS query.

    Args:
        policy (dict): Policy to analyze.
    Returns:
        bool: True if the policy pertains to a DNS query, False otherwise.
    """
    # Check if the policy contains DNS protocol data
    if "dns" not in policy["protocols"]:
        return False
    
    # Check if the policy is a DNS query
    is_response = policy["protocols"]["dns"].get("is_response", False)
    return not is_response


def get_dns_servers(policy: dict) -> set[str]:
    """
    Get the set of DNS servers a given policy is configured to use.

    Args:
        policy (dict): Policy to analyze.
    Returns:
        set[str]: set of DNS servers
    """
    # If policy does not contain a DNS query, early return
    if not contains_dns_query(policy):
        return set()

    # Policy contains a DNS query
    protocol_data = policy["protocols"]
    dns_servers = set()
    for ip_protocol in ("ipv4", "ipv6"):
        dst = protocol_data.get(ip_protocol, {}).get("dst", [])
        if isinstance(dst, list):
            dns_servers.update(dst)
        else:
            dns_servers.add(dst)

    return dns_servers


In [None]:
"""
Compute metrics over event signature trees.
"""

from pprint import pprint
import json
from collections import deque
from treelib import Tree
from utils.tree import build_tree
from utils.heuristic import get_node_flows, compare_policies


def compute_metrics(tree: Tree, queue: deque, unique_policies: list, result: dict = {}) -> dict:
    """
    Recursively count the number of unique policies in the event signature tree.

    Args:
        tree (treelib.Tree): Complete event signature tree.
        queue (deque): Queue of nodes to visit.
        unique_policies (list): List of already counted unique policies.
        result (dict): current computed metrics
    Returns:
        dict: Dictionary containing the computed metrics.
    """
    # Get next node to visit
    try:
        depth, node_id = queue.popleft()
    except IndexError:
        # No node remaining, return result
        return result
    
    # Skip root node
    if node_id == "0_root":
        # Add children to queue
        for child in tree.children(node_id):
            queue.append((depth + 1, child.identifier))
        # Recursively visit children
        return compute_metrics(tree, queue, unique_policies, result)
    
    ## Node is not the root, process it

    # Get node (meta)data
    node = tree.get_node(node_id)
    if node is None:
        return compute_metrics(tree, queue, unique_policies, result)
    
    # Get node policy
    policies = get_node_flows(node)
    policy = policies[-1]

    # Check if node's policy is already present
    is_policy_present = any(compare_policies(policy, p) for p in unique_policies)
    if not is_policy_present:
        ## New policy: add metrics
        unique_policies.append(policy)

        # Maximum tree depth of unique nodes
        result[DictKeysMetrics.MAX_UNIQUE_DEPTH.name] = max(result.get(DictKeysMetrics.MAX_UNIQUE_DEPTH.name, 0), depth)

        # Number of unique policies
        result[DictKeysMetrics.N_UNIQUE_NODES.name] = result.get(DictKeysMetrics.N_UNIQUE_NODES.name, 0) + 1

        # Robustness score
        if depth == 1:
            result[DictKeysMetrics.ROBUSTNESS_SCORE.name] = result.get(DictKeysMetrics.ROBUSTNESS_SCORE.name, 0)
        elif depth > 1:
            result[DictKeysMetrics.ROBUSTNESS_SCORE.name] = result.get(DictKeysMetrics.ROBUSTNESS_SCORE.name, 0) + 1

        # Node discovery rate
        if DictKeysMetrics.NODE_DISCOVERY_DEPTH.name not in result:
            result[DictKeysMetrics.NODE_DISCOVERY_DEPTH.name] = {}
        policy_discovery_depth = result[DictKeysMetrics.NODE_DISCOVERY_DEPTH.name]
        policy_discovery_depth[depth] = policy_discovery_depth.get(depth, 0) + 1

        # Domain names
        domain_names = get_domain_names(policy)
        if len(domain_names) > 0:
            if depth == 1:
                result[DictKeysMetrics.DOMAIN_NAMES_LVL_1.name]  = result.get(DictKeysMetrics.DOMAIN_NAMES_LVL_1.name, set()).union(domain_names)
            elif depth > 1:
                result[DictKeysMetrics.DOMAIN_NAMES_HIDDEN.name] = result.get(DictKeysMetrics.DOMAIN_NAMES_HIDDEN.name, set()).union(domain_names)
        if DictKeysMetrics.DOMAIN_NAMES_HIDDEN.name in result:
            domain_names_lvl_1 = result.get(DictKeysMetrics.DOMAIN_NAMES_LVL_1.name, set())
            result[DictKeysMetrics.DOMAIN_NAMES_HIDDEN.name] = result[DictKeysMetrics.DOMAIN_NAMES_HIDDEN.name].difference(domain_names_lvl_1)

        # DNS resolvers
        dns_servers = get_dns_servers(policy)
        if len(dns_servers) > 0:
            if depth == 1:
                result[DictKeysMetrics.DNS_SERVERS_LVL_1.name]  = result.get(DictKeysMetrics.DNS_SERVERS_LVL_1.name, set()).union(dns_servers)
            elif depth > 1:
                result[DictKeysMetrics.DNS_SERVERS_HIDDEN.name] = result.get(DictKeysMetrics.DNS_SERVERS_HIDDEN.name, set()).union(dns_servers)
        if DictKeysMetrics.DNS_SERVERS_HIDDEN.name in result:
            dns_servers_lvl_1 = result.get(DictKeysMetrics.DNS_SERVERS_LVL_1.name, set())
            result[DictKeysMetrics.DNS_SERVERS_HIDDEN.name] = result[DictKeysMetrics.DNS_SERVERS_HIDDEN.name].difference(dns_servers_lvl_1)

        # Add children to queue
        for child in tree.children(node_id):
            queue.append((depth + 1, child.identifier))
    
    # Continue recursion
    return compute_metrics(tree, queue, unique_policies, result)



### COMPUTE METRICS ###

for device in d:
    metrics_device = d[device][DictKeysMetrics.METRICS.name]
    for _, metrics in metrics_device.items():
        # Load event signature tree
        tree_json_file = metrics[DictKeysMetrics.FILE_TREE.name]
        with open(tree_json_file, "r") as f:
            tree_json = json.load(f)
        tree = build_tree(Tree(), tree_json)


        ## Compute metrics

        # Number of first level policies
        first_level_nodes = tree.children("0_root")
        n_first_level_nodes = len(first_level_nodes)
        metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name] = n_first_level_nodes

        # Total number of policies
        metrics[DictKeysMetrics.N_TOTAL_NODES.name] = tree.size() - 1  # Exclude root node

        # Maximum tree depth
        metrics[DictKeysMetrics.MAX_DEPTH.name] = tree.depth()

        # Computed metrics
        queue = deque([(0, "0_root")])
        unique_policies = []
        computed_metrics = compute_metrics(tree, queue, unique_policies, {})
        metrics.update(computed_metrics)

        # First-Level Node Coverage
        n_unique_policies = metrics[DictKeysMetrics.N_UNIQUE_NODES.name]
        flnc = n_first_level_nodes / n_unique_policies
        metrics[DictKeysMetrics.FIRST_LEVEL_NODE_COVERAGE.name] = flnc

        # First-Level Node Loss
        flnl = (n_unique_policies - n_first_level_nodes) / n_unique_policies
        metrics[DictKeysMetrics.FIRST_LEVEL_NODE_LOSS.name] = flnl


pprint(d, indent=2)


In [None]:
"""
Plot count of discovered unique flow IDs.
"""

from copy import deepcopy
import matplotlib.pyplot as plt

### DATA ###

# Initialize x & y vectors
vectors = {
    "list_device_events": [],
    "list_first_level_ids": [],
    "list_hidden_ids": [],
}
vectors_per_event = {
    "boot": deepcopy(vectors),
    "others": deepcopy(vectors)
}

# Retrieve data
max_total_ids  = 0
sum_hidden_ids = 0
sum_total_ids  = 0
for device in d:
    metrics_device = d[device][DictKeysMetrics.METRICS.name]
    for event, metrics in metrics_device.items():
        # Skip multiple events
        if event in fields_to_skip:
            continue

        event_category = "boot" if event == "boot" else "others"
        vectors_event = vectors_per_event[event_category]
        vectors_event["list_device_events"].append(f"{device}-{event}")
        n_first_level_ids = metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name]
        vectors_event["list_first_level_ids"].append(n_first_level_ids)
        n_hidden_ids = metrics[DictKeysMetrics.N_UNIQUE_NODES.name] - n_first_level_ids
        vectors_event["list_hidden_ids"].append(n_hidden_ids)

        # Update aggregate values
        max_total_ids = max(max_total_ids, metrics[DictKeysMetrics.N_UNIQUE_NODES.name])
        sum_hidden_ids += n_hidden_ids
        sum_total_ids += metrics[DictKeysMetrics.N_UNIQUE_NODES.name]

print(f"Sum of hidden flow IDs: {sum_hidden_ids}")
print(f"Sum of total flow IDs: {sum_total_ids}")
print(f"Percentage: {sum_hidden_ids / sum_total_ids * 100:.2f}%")
list_device_events = vectors_per_event["boot"]["list_device_events"] + vectors_per_event["others"]["list_device_events"]
list_first_level_ids = vectors_per_event["boot"]["list_first_level_ids"] + vectors_per_event["others"]["list_first_level_ids"]
list_hidden_ids = vectors_per_event["boot"]["list_hidden_ids"] + vectors_per_event["others"]["list_hidden_ids"]

print("Device-event pairs:")
for i, device_event in enumerate(list_device_events, start=1):
    print(f"{device_event}: {i}")

# Initialize plot
figsize = (3.4, 1.7)
fig, ax = plt.subplots(figsize=figsize)

# Plot stacked bars
x = range(1, len(list_first_level_ids) + 1)
ax.bar(x, list_first_level_ids, label="First-Level Flow IDs", color='white', edgecolor='black', linewidth=LINE_THICKNESS)
ax.bar(x, list_hidden_ids, bottom=list_first_level_ids, label="Hidden Flow IDs", color='black', edgecolor='black', linewidth=LINE_THICKNESS)

# Plot metadata
ax.grid(axis="y", linewidth=GRID_LINE_THICKNESS)
#ax.set_xlabel("Event ID")
ax.set_ylabel("Count of Unique Flow IDs")
ax.legend()
ax.set_xlim(0, len(list_first_level_ids) + 1)
ax.set_xticks(range(1, len(list_first_level_ids) + 1))
ax.set_xticklabels([i if i == 1 or i % 5 == 0 else "" for i in range(1, len(list_first_level_ids) + 1)])
ax.set_yticks(range(0, max_total_ids + 1, 2))
fig.tight_layout()
plt.show()

# Save figure to paper repository
#save_to_paper(fig, f"count_flow-id.pdf")


In [None]:
"""
Plot Robustness Score for all device events.
"""

from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt


### DATA ###

# Initialize robustness score vector
vectors = {
    "list_device_events": [],
    "list_robustness": []
}
vectors_per_event = {
    "boot": deepcopy(vectors),
    "others": deepcopy(vectors)
}

# Extract Robustness Score
for device in d:
    metrics_device = d[device][DictKeysMetrics.METRICS.name]
    for event, metrics in metrics_device.items():
        # Skip multiple events
        if event in fields_to_skip:
            continue

        event_category = "boot" if event == "boot" else "others"
        vectors_event = vectors_per_event[event_category]
        vectors_event["list_device_events"].append(f"{device}-{event}")
        vectors_event["list_robustness"].append(metrics[DictKeysMetrics.ROBUSTNESS_SCORE.name])

list_device_events = vectors_per_event["boot"]["list_device_events"] + vectors_per_event["others"]["list_device_events"]
list_robustness = vectors_per_event["boot"]["list_robustness"] + vectors_per_event["others"]["list_robustness"]
mean_robustness = np.mean(list_robustness)
max_robustness  = max(list_robustness)


### PLOT ###

# Initialize plot
figsize = (3.4, 1.4)
fig, ax = plt.subplots(figsize=figsize)

## Plot robustness score
# Boot event
vectors_boot = vectors_per_event["boot"]
x = range(1, len(vectors_boot["list_device_events"]) + 1)
ax.scatter(x, vectors_boot["list_robustness"], label="Boot events", color='black', marker='o', s=10)
# Other events
vectors_others = vectors_per_event["others"]
min_x = len(vectors_boot["list_device_events"]) + 1
x = range(min_x, min_x + len(vectors_others["list_device_events"]))
ax.scatter(x, vectors_others["list_robustness"], label="Other events", color='black', marker='x', s=15)

# Plot mean
print(f"Mean Robustness Score: {mean_robustness}")
ax.axhline(mean_robustness, color='black', linestyle='--', label=f'Mean = {mean_robustness:.2f}', linewidth=1)

# Plot metadata
ax.grid(linewidth=GRID_LINE_THICKNESS)
ax.set_xlim(0, len(list_device_events) + 1)
ax.set_xticks(range(1, len(list_first_level_ids) + 1))
ax.set_xticklabels([i if i == 1 or i % 5 == 0 else "" for i in range(1, len(list_device_events) + 1)])
ax.set_yticks(range(0, max_robustness, 2))
#ax.set_xlabel("Event ID")
ax.set_ylabel("Robustness Score")
ax.legend()
fig.tight_layout()
plt.show()

# Save figure to paper repository
#save_to_paper(fig, f"robustness_score.pdf")


In [None]:
"""
Plot Robustness score as a stripplot.
"""

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

MEAN_LINE_WIDTH     = 0.5
MEAN_LINE_THICKNESS = 2


### DATA ###

classifiers = [DictKeysMetadata.DEVICE_TYPE, DictKeysMetadata.APP, DictKeysMetadata.MANUFACTURER]

# Extract Robustness Score
robustness = {}
for device in d:
    device_type = d[device][DictKeysMetadata.METADATA.name][DictKeysMetadata.DEVICE_TYPE.name]
    app = d[device][DictKeysMetadata.METADATA.name][DictKeysMetadata.APP.name]
    manufacturer = d[device][DictKeysMetadata.METADATA.name][DictKeysMetadata.MANUFACTURER.name]
    metrics_device = d[device][DictKeysMetrics.METRICS.name]
    for event, metrics in metrics_device.items():
        # Skip multiple events
        if event in fields_to_skip:
            continue

        device_event = f"{device}-{event}"
        robustness[device_event] = ((device_type, app, manufacturer), metrics[DictKeysMetrics.ROBUSTNESS_SCORE.name])

# Sort by metadata
list_robustness = []
for i in range(3):
    robustness_sorted = {k: v for k, v in sorted(robustness.items(), key=lambda item: sum(1 for metadata, _ in robustness.values() if metadata[i] == item[1][0][i]), reverse=True)}
    list_robustness.append(robustness_sorted)


### PLOT ###

# Initialize plot
fig = plt.figure(figsize=(3.4,2))
gs = GridSpec(1, 3, width_ratios=[1, 4/3, 2])
gs.update(wspace=-1)

ax1 = fig.add_subplot(gs[0])
ax1.set_ylabel("Robustness Score")
ax2 = fig.add_subplot(gs[1], sharey=ax1)  # Share y-axis with ax1
ax3 = fig.add_subplot(gs[2], sharey=ax1)  # Share y-axis with ax1
axes = [ax1, ax2, ax3]

for i, robustness in enumerate(list_robustness):

    # Get x and y axes data
    groups = list(dict.fromkeys([metadata[i] for metadata, _ in robustness.values()]))
    for j, group in enumerate(groups, start=1):
        y = [robustness for metadata, robustness in robustness.values() if metadata[i] == group]
        x = [j] * len(y)

        # Plot data
        axes[i].scatter(x, y, alpha=0.6, color="black", marker="x")

        if len(y) > 1:
            # Add mean line
            mean = np.mean(y)
            x_start_mean = j - MEAN_LINE_WIDTH / 2
            x_end_mean = j + MEAN_LINE_WIDTH / 2
            axes[i].plot([x_start_mean, x_end_mean], [mean, mean], color='red', linewidth=MEAN_LINE_THICKNESS, label="Mean" if j == 1 else None)
        
        # Add legend for mean on 3rd subplot
        if i == 2:
            axes[i].legend(handlelength=MEAN_LINE_WIDTH * 2, bbox_to_anchor=(0.6, 0.9))


    # Replace categorical value "SmartThings" with "ST" for brevity
    # try:
    #     idx_st = groups.index(Apps.SMARTTHINGS.value)
    #     groups[idx_st] = "ST"
    # except ValueError:
    #     pass

    # Plot metadata
    classifier = classifiers[i].name.lower()
    axes[i].grid(axis='y')
    axes[i].set_xticks(range(1, len(groups) + 1))
    axes[i].set_xticklabels(groups, rotation=45, ha="right")
    max_robustness = max([value for _, value in robustness.values()])
    axes[i].set_yticks(range(0, max_robustness + 1, 2))
    xlabel = "Device category" if classifier == "device_type" else classifier.capitalize()
    axes[i].set_xlabel(xlabel)
    if i != 0:
        plt.setp(axes[i].get_yticklabels(), visible=False)

#fig.suptitle("Robustness Score")
plt.subplots_adjust(hspace=0)
fig.align_xlabels(axes)
gs.tight_layout(fig)
plt.show()

# Save figure to paper repository
#save_to_paper(fig, "robustness_grouped.pdf")


In [None]:
"""
Domain names, first-level and hidden,
for all device events.
"""

from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt

### DATA ###

# Initialize x & y vectors
vectors = {
    "list_device_events": [],
    "list_first_level_names": [],
    "list_first_level_resolvers": [],
    "list_hidden_names": [],
    "list_hidden_resolvers": []
}
vectors_per_event = {
    "boot": deepcopy(vectors),
    "others": deepcopy(vectors)
}

# Retrieve data
max_total_names = 0
n_total_resolvers   = 0
max_total_resolvers = 0
for device in d:
    metrics_device = d[device][DictKeysMetrics.METRICS.name]
    for event, metrics in metrics_device.items():
        # Skip multiple events
        if event in fields_to_skip:
            continue

        event_category = "boot" if event == "boot" else "others"

        vectors_event = vectors_per_event[event_category]

        # Device-event pair
        vectors_event["list_device_events"].append(f"{device}-{event}")
        
        # First-level domain names
        n_first_level_names = len(metrics.get(DictKeysMetrics.DOMAIN_NAMES_LVL_1.name, set()))
        vectors_event["list_first_level_names"].append(n_first_level_names)

        # First-level domain resolvers
        n_first_level_resolvers = len(metrics.get(DictKeysMetrics.DNS_SERVERS_LVL_1.name, set()))
        vectors_event["list_first_level_resolvers"].append(n_first_level_resolvers)

        # Hidden domain names
        n_hidden_names = len(metrics.get(DictKeysMetrics.DOMAIN_NAMES_HIDDEN.name, set()))
        vectors_event["list_hidden_names"].append(n_hidden_names)

        # Hidden domain resolvers
        n_hidden_resolvers = len(metrics.get(DictKeysMetrics.DNS_SERVERS_HIDDEN.name, set()))
        vectors_event["list_hidden_resolvers"].append(n_hidden_resolvers)

        # Update aggregated values
        max_total_names = max(max_total_names, n_first_level_names + n_hidden_names)
        n_total_resolvers += n_first_level_resolvers + n_hidden_resolvers
        max_total_resolvers = max(max_total_resolvers, n_first_level_resolvers + n_hidden_resolvers)

list_first_level_names = vectors_per_event["boot"]["list_first_level_names"] + vectors_per_event["others"]["list_first_level_names"]
list_first_level_resolvers = vectors_per_event["boot"]["list_first_level_resolvers"] + vectors_per_event["others"]["list_first_level_resolvers"]
list_hidden_names = vectors_per_event["boot"]["list_hidden_names"] + vectors_per_event["others"]["list_hidden_names"]
list_hidden_resolvers = vectors_per_event["boot"]["list_hidden_resolvers"] + vectors_per_event["others"]["list_hidden_resolvers"]

#pprint(vectors_per_event)


### PLOT ###

### Boot events only

# Initialize plot
figsize = (FIG_WIDTH, 1)
fig, ax = plt.subplots(figsize=figsize)
ax.grid(axis='y')

# Bar width and positions
bar_width = 0.3
bar_positions = [bar_width/2 * pos for pos in [-1, 1]]
bar_colors = ["black", "gray", "white", "white"]

# Server names
list_first_level_names_boot = vectors_per_event["boot"]["list_first_level_names"]
list_hidden_names_boot = vectors_per_event["boot"]["list_hidden_names"]
idx_servers = np.arange(len(list_first_level_names_boot))
ax.bar(idx_servers + bar_positions[0], list_first_level_names_boot, width=bar_width, color=bar_colors[0], edgecolor='black', linewidth=LINE_THICKNESS, label="First-Level server names")
ax.bar(idx_servers + bar_positions[0], list_hidden_names_boot, width=bar_width, color=bar_colors[1], edgecolor='black', linewidth=LINE_THICKNESS, bottom=list_first_level_names_boot, label="Hidden server names")

# DNS resolvers
list_first_level_resolvers_boot = vectors_per_event["boot"]["list_first_level_resolvers"]
list_hidden_resolvers_boot = vectors_per_event["boot"]["list_hidden_resolvers"]
idx_resolvers = np.arange(len(list_first_level_resolvers_boot))
ax.bar(idx_resolvers + bar_positions[1], list_first_level_resolvers_boot, width=bar_width, color=bar_colors[2], edgecolor='black', linewidth=LINE_THICKNESS, label="First-Level DNS resolvers")
ax.bar(idx_resolvers + bar_positions[1], list_hidden_resolvers_boot, width=bar_width, color=bar_colors[3], edgecolor='black', hatch="////", linewidth=LINE_THICKNESS, bottom=list_first_level_resolvers_boot, label="Hidden DNS resolvers")

## Plot metadata
# x-axis
ax.set_xticks(idx_servers)
ax.set_xticklabels(idx_servers)
labels_devices = [event.replace("-boot", "") for event in vectors_per_event["boot"]["list_device_events"]]
#ax.set_xticklabels(labels_devices, rotation=45, ha="right")  # Uncomment to set x-axis ticks to device names
#ax.invert_yaxis()
# y-axis
ax.set_ylabel("Count")
max_y = max(max_total_names, max_total_resolvers)
ax.set_yticks(range(0, max_y + 1, 2))

# Show figure
fig.tight_layout()
plt.show()

# Save figure to paper repository
#save_to_paper(fig, "count_dns_boot.pdf")

## Save legend as standalone figure
figsize = (FIG_WIDTH, 0.4)
fig_legend = plt.figure(figsize=figsize)
legend = fig_legend.legend(*ax.get_legend_handles_labels(), loc="center", ncol=2)
fig_legend.canvas.draw()
fig_legend.tight_layout()
#save_to_paper(fig_legend, "count_dns_boot_legend.pdf")


### All events

# Bar metadata
bar_width = 0.3
bar_positions = [bar_width/2 * pos for pos in [-1, 1]]
bar_colors = ["black", "gray", "white", "white"]

# Initialize plot
figsize = (FIG_WIDTH, 6)
fig, ax = plt.subplots(figsize=figsize)
ax.grid(axis="x")

## Plot stacked bars
x = 1 + np.arange(len(list_first_level_names))
colors_default = plt.rcParams["axes.prop_cycle"].by_key()["color"]
# Domain names
color = colors_default[0]
ax.barh(x + bar_positions[0], list_first_level_names, height=bar_width, color=bar_colors[0], edgecolor="black", linewidth=LINE_THICKNESS, label="First-Level domain names")
color = colors_default[3]
ax.barh(x + bar_positions[0], list_hidden_names, height=bar_width, color=bar_colors[1], edgecolor="black", linewidth=LINE_THICKNESS, left=list_first_level_names, label="Hidden domain names")
# DNS resolvers
color = colors_default[1]
ax.barh(x + bar_positions[1], list_first_level_resolvers, height=bar_width, color=bar_colors[2], edgecolor="black", linewidth=LINE_THICKNESS, label="First-Level DNS resolvers")
color = colors_default[2]
ax.barh(x + bar_positions[1], list_hidden_resolvers, height=bar_width, color=bar_colors[3], hatch="////", edgecolor="black", linewidth=LINE_THICKNESS, left=list_first_level_resolvers, label="Hidden DNS resolvers")

## Plot metadata
# Event ID
ax.set_ylabel("Event ID")
ax.set_ylim(0.5, len(list_first_level_names) + 0.5)
ax.set_yticks(x)
ax.set_yticklabels([i if i == 1 or i % 5 == 0 else "" for i in range(1, len(list_first_level_names) + 1)])
# Count
ax.set_xlabel("Count")
max_count = max(max_total_names, max_total_resolvers)
ax.set_xticks(range(0, max_count + 1, 1))
ax.invert_yaxis()
ax.legend(bbox_to_anchor=(0.41, 0.1))

# Show figure
fig.tight_layout()
plt.show()

# Save figure to paper repository
#save_to_paper(fig, f"count_dns_all.pdf")


In [None]:
"""
Count pruned nodes per depth.
Only for TP-Link HS110's toggle event.
"""

from pathlib import Path
import json
from pprint import pprint


def is_pruned(node: dict) -> bool:
    """
    Check if the given node is pruned.

    Args:
        node (dict): Node to check.
    Returns:
        bool: True if the node is pruned, False otherwise.
    """
    return len(node.get("children", [])) == 0


def compute_pruned_nodes_per_depth(node: dict, data: dict) -> dict:
    """
    Recursively compute the count of pruned children nodes,
    per parent node, per depth.

    Args:
        node (dict): Node to process.
        data (dict): Accumulator to store the count of pruned children nodes.
    Returns:
        dict: Updated accumulator.
    """
    try:
        depth = node.get("data", [])[0]
    except IndexError:
        return data
    else:
        n_not_pruned, n_pruned = data.get(depth, (0, 0))
        if is_pruned(node):
            n_pruned += 1
        else:
            n_not_pruned += 1
        data[depth] = (n_not_pruned, n_pruned)
        for child in node.get("children", []):
            id = list(child.keys())[0]
            compute_pruned_nodes_per_depth(child[id], data)
    
    return data



### DATA ###

# Load event's JSON tree
device = "TpLinkPlug"
event  = "toggle"
tree_json_path = Path(d[device][DictKeysMetrics.METRICS.name][event][DictKeysMetrics.FILE_TREE.name])
dict_tree = None
with open(tree_json_path, "r") as f:
    dict_tree  = json.load(f)

#print(json.dumps(data_tree, indent=2))

# Read event signature tree
data = {}
for child in dict_tree["0_root"]["children"]:
    id = list(child.keys())[0]
    compute_pruned_nodes_per_depth(child[id], data)

print(data)


### PLOT ###

# Initialize plot
figsize = (2, 3)
fig, ax = plt.subplots(figsize=figsize)

# Get axis data
x            = []
y_not_pruned = []
y_pruned     = []
for depth, (n_not_pruned, n_pruned) in data.items():
    x.append(depth)
    y_not_pruned.append(n_not_pruned)
    y_pruned.append(n_pruned)

# Plot data
ax.bar(x, y_not_pruned, label="Not pruned", color="white", edgecolor="black", linewidth=LINE_THICKNESS)
ax.bar(x, y_pruned, bottom=y_not_pruned, label="Pruned", color="black", edgecolor="black", linewidth=LINE_THICKNESS)

# Metadata
ax.grid(axis="y", linewidth=GRID_LINE_THICKNESS)
ax.set_xlabel("Event signature tree depth")
ax.set_ylabel("Flow ID count")
ax.legend()
ax.set_xticks(x)

# Show ar save figure
fig.tight_layout()
plt.show()


In [None]:
"""
Count pruned nodes per depth,
for all events.
"""

from pathlib import Path
import json
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter


DEPTH_MAX = 4


def is_pruned(node: dict) -> bool:
    """
    Check if the given node is pruned.

    Args:
        node (dict): Node to check.
    Returns:
        bool: True if the node is pruned, False otherwise.
    """
    return len(node.get("children", [])) == 0


def compute_pruned_nodes_per_depth(node: dict, data: dict) -> dict:
    """
    Recursively compute the count of pruned nodes per tree depth.

    Args:
        node (dict): Node to process.
        data (dict): Accumulator to store the count of pruned nodes.
    Returns:
        dict: Updated accumulator.
    """
    try:
        depth = node.get("data", [])[0]
    except IndexError:
        return data
    else:
        n_pruned = data.get(depth, 0)
        if is_pruned(node):
            n_pruned += 1
        data[depth] = n_pruned
        for child in node.get("children", []):
            id = list(child.keys())[0]
            compute_pruned_nodes_per_depth(child[id], data)
    
    return data



### DATA ###

dict_categories = {
    "boot":   {
        "list_device_events": []
    },
    "others": {
        "list_device_events": []
    }
}

# Load failed events data
path_json_failed = Path(DEVICES_DIR, "failed_events.json")
data_failed = {}
with open(path_json_failed, "r") as f:
    data_failed = json.load(f)


# Compute count of pruned nodes per depth
for device in d:
    metrics_device = d[device][DictKeysMetrics.METRICS.name]
    for event, metrics in metrics_device.items():
        # Skip multiple events
        if event in fields_to_skip:
            continue

        category = "boot" if event == "boot" else "others"
        dict_category = dict_categories[category]
        dict_category["list_device_events"].append(f"{device}-{event}")

        # Load event's JSON tree
        tree_json_path = Path(d[device][DictKeysMetrics.METRICS.name][event][DictKeysMetrics.FILE_TREE.name])
        dict_tree = None
        with open(tree_json_path, "r") as f:
            dict_tree  = json.load(f)
        
        # Compute count of pruned nodes
        data = {}
        for child in dict_tree["0_root"]["children"]:
            id = list(child.keys())[0]
            compute_pruned_nodes_per_depth(child[id], data)
        
        # Get failed events iteration count for this event
        failed_per_depth = data_failed.get(f"{device}-{event}", {})

        for depth in range(1, DEPTH_MAX + 1):
            data_depth = dict_category.get(depth, [])
            n_pruned = data.get(depth, 0)
            n_failed = failed_per_depth.get(str(depth), 0)
            data_depth.append(n_pruned - n_failed)
            dict_category[depth] = data_depth


list_device_events = dict_categories["boot"]["list_device_events"] + dict_categories["others"]["list_device_events"]



### PLOT ###

# Initialize plot
figsize = (FIG_WIDTH, 2.2)
fig, ax = plt.subplots(figsize=figsize)

# Styles
colors  = ["white", "white", "gray", "black"]
hatches = ["////", "", "", ""]

# Plot data
x = range(1, len(list_device_events) + 1)
bottom = np.zeros(len(list_device_events))
max_x = 0
for depth in range(1, DEPTH_MAX + 1):
    list_n_pruned = dict_categories["boot"][depth] + dict_categories["others"][depth]
    if depth == 1:
        ax.bar(x, list_n_pruned, label=f"depth={depth}", color=colors[depth-1], hatch=hatches[depth-1], edgecolor="black", linewidth=LINE_THICKNESS)
    elif depth > 1:
        ax.bar(x, list_n_pruned, bottom=bottom, label=f"depth={depth}", color=colors[depth-1], hatch=hatches[depth-1], edgecolor="black", linewidth=LINE_THICKNESS)
    
    bottom += np.array(list_n_pruned)
    max_x = max(max_x, max(bottom))

# Metadata
ax.grid(axis="y", linewidth=GRID_LINE_THICKNESS)
#ax.set_xlabel("Event ID")
ax.set_ylabel("Count of pruned Flow IDs")
ax.legend()
ax.set_xlim(0, len(list_first_level_ids) + 1)
ax.set_xticks(x)
ax.set_xticklabels([i if i == 1 or i % 5 == 0 else "" for i in range(1, len(list_first_level_ids) + 1)])
ax.set_ylim(1, max_x + 100)

# Log scale y-axis
ax.set_yscale("log")
ax.yaxis.set_major_formatter(ScalarFormatter())
ax.ticklabel_format(style='plain', axis='y')
ax.yaxis.get_major_formatter().set_useOffset(False)

# Show ar save figure
fig.tight_layout()
plt.show()
#save_to_paper(fig, "count_pruned.pdf")


In [None]:
"""
Load data from other works:
- PingPong
- BehavIoT
- MUDgee
- Blocking without Breaking
"""

import os
import glob
import json
from pprint import pprint

# Paths
path_others = os.path.join(BASE_DIR, "others")


def load_data_others(name: str) -> dict:
    """
    Load device data retrieved from other works.

    Args:
        name (str): Name of the work.
    Returns:
        dict: Dictionary containing the device data.
    """
    path_other = os.path.join(path_others, name)
    list_json = glob.glob(os.path.join(path_other, "*.json"))
    dict_other = {}
    for json_file in list_json:
        split  = os.path.basename(json_file).split(".")
        device = split[0]
        event  = split[1]
        if event == "no_boot" or event == "raw":
            continue

        device_event = f"{device}-{event}"
        dict_other[device_event] = {
            "policies": {},
            "count": 0
        }
        with open(json_file, "r") as f:
            data_json = json.load(f)
            for policy in data_json["0_root"]["children"]:
                policy_name = list(policy.keys())[0]
                dict_other[device_event]["policies"][policy_name] = policy[policy_name]["data"][1][0]
                dict_other[device_event]["count"] += 1

    return dict_other


# Load data from PingPong
dict_pingpong = load_data_others("pingpong")
pprint(dict_pingpong, indent=2)

# Load data from BehavIoT
dict_behaviot = load_data_others("behaviot")
pprint(dict_behaviot, indent=2)

# Load data from MUDgee
dict_mudgee = load_data_others("mudgee")
pprint(dict_mudgee, indent=2)


In [None]:
"""
Compare our work with PingPong and BehavIoT,
at the event level.
"""

from pprint import pprint
import numpy as np


### DATA ###

set_device_events_pingpong = set(dict_pingpong.keys())
set_device_events_behaviot = set(dict_behaviot.keys())

# Initialize values
values = {
    "ours": {},
    "PingPong": {},
    "BehavIoT": {},
}


## Populate values

# PingPong
for device_event, data in dict_pingpong.items():
    values["PingPong"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: data["count"],
        DictKeysMetrics.N_UNIQUE_NODES.name: data["count"]
    }

# BehavIoT
for device_event, data in dict_behaviot.items():
    values["BehavIoT"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: data["count"],
        DictKeysMetrics.N_UNIQUE_NODES.name: data["count"]
    }


## Ours

list_device_events = [
    "TpLinkPlug-toggle",
    "SmartThingsOutlet-boot",
    "SmartThingsOutlet-toggle",
    "DLinkCamera-stream",
    "HueLight-toggle"
]
for device_event in list_device_events:
    device, event = device_event.split("-")
    device_event_metrics = d[device][DictKeysMetrics.METRICS.name][event]
    values["ours"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: device_event_metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name],
        DictKeysMetrics.N_UNIQUE_NODES.name: device_event_metrics[DictKeysMetrics.N_UNIQUE_NODES.name]
    }

#pprint(values, indent=2)


### PLOT ###

# Metadata
# Bar width and positions
bar_width = 0.2
bar_positions = [bar_width/2 * pos for pos in [-2, 0, 2]]
bar_colors = ["black", "gray", "white"]
idx_devices = np.arange(len(list_device_events))
# Figure size
# Uncomment the following to set figure size
figsize = (FIG_WIDTH, 2)
fig, ax = plt.subplots(figsize=figsize)
# Uncomment the following to let matplotlib set the figure size automagically
#fig, ax = plt.subplots()
ax.grid(axis='x', linewidth=GRID_LINE_THICKNESS)

# Plot bars
max_value = 0
for i, (work, data_device) in enumerate(values.items()):
    values_work = list(reversed([values[work].get(device_event, {}).get(DictKeysMetrics.N_UNIQUE_NODES.name, 0) for device_event in list_device_events]))
    label = "This work" if work == "ours" else work
    ax.barh(idx_devices + bar_positions[i], values_work, bar_width, label=label, color=bar_colors[i], edgecolor="black", linewidth=LINE_THICKNESS)
    max_value = max(max_value, max(values_work))

# Labels and title
ax.set_xlabel('Number of unique flow IDs')
#ax.set_title('Number of unique flow IDs discovered by each work')
ax.set_yticks(idx_devices)
yticklabels = [device_event for device_event in reversed(list_device_events)]
ax.set_yticklabels(yticklabels)
ax.set_xticks(range(0, max_value + 1))
ax.set_xticklabels([i if i % 2 == 0 else "" for i in range(0, max_value + 1)])
ax.legend(reverse=True)

# Show plot
fig.tight_layout()
plt.show()

# Save plot
#save_to_paper(fig, "comparison_events.pdf")


In [None]:
"""
Compare our work with MUDgee,
at the device level.
"""

from pprint import pprint


devices_mudgee = set(dict_mudgee.keys())

# Initialize values
values = {
    "ours": {},
    "MUDgee": {}
}


## Populate values

# MUDgee
for device_event, data in dict_mudgee.items():
    device = device_event.split("-")[0]
    values["MUDgee"][device] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: data["count"],
        DictKeysMetrics.N_UNIQUE_NODES.name: data["count"]
    }

# Our plugs
for device in ["TpLinkPlug", "SmartThingsOutlet"]:
    device_event_metrics = d[device][DictKeysMetrics.METRICS.name]["device"]
    values["ours"][device] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: device_event_metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name],
        DictKeysMetrics.N_UNIQUE_NODES.name: device_event_metrics[DictKeysMetrics.N_UNIQUE_NODES.name]
    }

# Our Hue Light
pprint(d["HueLight"][DictKeysMetrics.METRICS.name])
device_event_metrics = d["HueLight"][DictKeysMetrics.METRICS.name]["device"]
values["ours"]["HueLight"] = {
    DictKeysMetrics.N_FIRST_LVL_NODES.name: device_event_metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name],
    DictKeysMetrics.N_UNIQUE_NODES.name: device_event_metrics[DictKeysMetrics.N_UNIQUE_NODES.name]
}

pprint(values)


## Plot

# Metadata
devices = list(values["ours"].keys())
# Bar width and positions
bar_width = 0.2
bar_positions = [bar_width/2 * pos for pos in [-1, 1]]
colors_bars  = ["black", "white"]
idx_devices = np.arange(len(devices))
# Figure size
# Uncomment the following to set figure size
figsize = (FIG_WIDTH, 1.25)
fig, ax = plt.subplots(figsize=figsize)
# Uncomment the following to let matplotlib set the figure size automagically
#fig, ax = plt.subplots()
ax.grid(axis='x')

# Plot bars
max_value = 0
for i, (work, data_device) in enumerate(values.items()):
    values_work = list(reversed([values[work].get(device, {}).get(DictKeysMetrics.N_UNIQUE_NODES.name, 0) for device in devices]))
    label = "This work" if work == "ours" else work
    ax.barh(idx_devices + bar_positions[i], values_work, bar_width, label=label, color=colors_bars[i], edgecolor="black", linewidth=LINE_THICKNESS)
    max_value = max(max_value, max(values_work))

# Labels and title
ax.set_xlabel('Number of unique flow IDs')
#ax.set_title('Number of unique flow IDs discovered by each work')
ax.set_yticks(idx_devices)
labels_devices = list(reversed(devices))
ax.set_yticklabels(labels_devices)
ax.set_xticks(range(0, max_value + 1, 2))
ax.legend(reverse=True, bbox_to_anchor=(0.6, 0.5))

# Show plot
fig.tight_layout()
plt.show()

# Save plot
#save_to_paper(fig, "comparison_full.pdf")


In [None]:
"""
Compare our work with both
the event-level works (PingPong & BehavIoT)
and the device-level work (MUDgee).
"""


from pprint import pprint
import numpy as np


### DATA ###

set_device_events_pingpong = set(dict_pingpong.keys())
set_device_events_behaviot = set(dict_behaviot.keys())
set_device_events_mudgee   = set(dict_mudgee.keys())

# Initialize values
values_events = {
    "ours":     {},
    "PingPong": {},
    "BehavIoT": {}
}
values_devices = {
    "ours":     {},
    "MUDgee":   {}
}


## Populate values

# PingPong
for device_event, data in dict_pingpong.items():
    values_events["PingPong"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: data["count"],
        DictKeysMetrics.N_UNIQUE_NODES.name: data["count"]
    }

# BehavIoT
for device_event, data in dict_behaviot.items():
    values_events["BehavIoT"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: data["count"],
        DictKeysMetrics.N_UNIQUE_NODES.name: data["count"]
    }

# MUDgee
for device_event, data in dict_mudgee.items():
    #device = device_event.split("-")[0]
    values_devices["MUDgee"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: data["count"],
        DictKeysMetrics.N_UNIQUE_NODES.name: data["count"]
    }


## Ours

list_events = [
    "TpLinkPlug-toggle",
    "SmartThingsOutlet-boot",
    "SmartThingsOutlet-toggle",
    "DLinkCamera-stream",
    "HueLight-toggle"
]
for device_event in list_events:
    device, event = device_event.split("-")
    device_event_metrics = d[device][DictKeysMetrics.METRICS.name][event]
    values_events["ours"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: device_event_metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name],
        DictKeysMetrics.N_UNIQUE_NODES.name: device_event_metrics[DictKeysMetrics.N_UNIQUE_NODES.name]
    }

list_devices = [
    "TpLinkPlug-device",
    "SmartThingsOutlet-device",
    "HueLight-device"
]
for device_event in list_devices:
    device, event = device_event.split("-")
    device_event_metrics = d[device][DictKeysMetrics.METRICS.name][event]
    values_devices["ours"][device_event] = {
        DictKeysMetrics.N_FIRST_LVL_NODES.name: device_event_metrics[DictKeysMetrics.N_FIRST_LVL_NODES.name],
        DictKeysMetrics.N_UNIQUE_NODES.name: device_event_metrics[DictKeysMetrics.N_UNIQUE_NODES.name]
    }

pprint(values_events,  indent=2)
pprint(values_devices, indent=2)


### PLOT ###

# Figure size
# Uncomment the following to set figure size
figsize = (FIG_WIDTH, 2)
fig, ax = plt.subplots(figsize=figsize)
# Uncomment the following to let matplotlib set the figure size automagically
#fig, ax = plt.subplots()
ax.grid(axis='x', linewidth=GRID_LINE_THICKNESS)


## Event-level

# Bar width and positions
bar_width = 0.2
bar_positions = [bar_width/2 * pos for pos in [-2, 0, 2]]
bar_colors = ["black", "gray", "white"]
idx_events = np.arange(len(list_events)) + len(list_devices)

# Plot bars
max_value = 0
for i, (work, data_event) in enumerate(values_events.items()):
    values_work = list(reversed([values_events[work].get(event, {}).get(DictKeysMetrics.N_UNIQUE_NODES.name, 0) for event in list_events]))
    label = "This work" if work == "ours" else work
    ax.barh(idx_events + bar_positions[i], values_work, bar_width, label=label, color=bar_colors[i], edgecolor="black", linewidth=LINE_THICKNESS)
    max_value = max(max_value, max(values_work))


## Device-level

# Bar width and positions
bar_width = 0.2
bar_positions = [bar_width/2 * pos for pos in [-1, 1]]
bar_colors  = ["black", "white"]
bar_hatches = ["", "////"]
idx_devices = np.arange(len(list_devices))

# Plot bars
for i, (work, data_devices) in enumerate(values_devices.items()):
    values_work = list(reversed([values_devices[work].get(device, {}).get(DictKeysMetrics.N_UNIQUE_NODES.name, 0) for device in list_devices]))
    label = None if work == "ours" else work
    ax.barh(idx_devices + bar_positions[i], values_work, bar_width, label=label, color=bar_colors[i], edgecolor="black", hatch=bar_hatches[i], linewidth=LINE_THICKNESS)
    max_value = max(max_value, max(values_work))


# Labels and title
ax.set_xlabel('Number of unique Flow IDs')
#ax.set_title('Number of unique Flow IDs discovered by each work')
ax.set_yticks(np.concatenate((idx_devices, idx_events)))
yticklabels = reversed(list_events + list_devices)
ax.set_yticklabels(yticklabels)
ax.set_xticks(range(0, max_value + 1))
ax.set_xticklabels([i if i % 2 == 0 else "" for i in range(0, max_value + 1)])
ax.legend()

# Show plot
fig.tight_layout()
plt.show()

# Save plot
#save_to_paper(fig, "comparison_all.pdf")

