In [None]:
%load_ext nb_black

import os

# Whether to produce PDF output for LaTeX
LATEX = True if os.getenv("EXPORT_LATEX") else False

# Import experimental data

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

palette = "deep"
cmap = sns.color_palette(palette, as_cmap=True)
sns.set_palette(cmap)

# For use in Latex document
if LATEX:
    display("Color map (plots)")
    display(cmap)
    display("Color map (text)")
    display(sns.color_palette("dark", as_cmap=True))

# Naming conventions
measurement_dir = "Measurements/privatedrop"
receiver_str = "receiver"
sender_str = "sender"

# Experiments
experiments = [
    os.path.join(measurement_dir, dir)
    for dir in os.listdir(measurement_dir)
    if os.path.isdir(os.path.join(measurement_dir, dir))
]

date_columns_receiver = [
    "startTime",
    "StartVerifying",
    "Verified",
    "StartCalculatingV",
    "CalculatedV",
    "StartIntersecting",
    "Intersected",
    "StartCalculatingZ",
    "CalculatedZ",
    "StartCalculatingPOK",
    "CalculatedPOK",
    "PSICompleted",
    "ReceivedStartPSIRequest",
    "SentStartPSIResponse",
    "ReceivedFinishPSIRequest",
    "SentFinishPSIResponse",
    "ReceivedDiscoverRequest",
    "SentDiscoverResponse",
    "UploadFinished",
]
date_columns_sender = [
    "startTime",
    "StartCalculatingV",
    "CalculatedV",
    "StartIntersecting",
    "Intersected",
    "StartCalculatingZ",
    "CalculatedZ",
    "StartCalculatingPOK",
    "CalculatedPOK",
    "PSICompleted",
    "StartedBrowsingBonjour",
    "DiscoveredReceiverBonjour",
    "SendingStartPSIRequest",
    "ReceivedStartPSIResponse",
    "SendingFinishPSIRequest",
    "ReceivedFinishPSIResponse",
    "SendingDiscoverRequest",
    "ReceivedDiscoverResponse",
    "StartsSendingFile",
    "UploadFinished",
    "DiscoverCompleted",
]
date_columns = list(set(date_columns_receiver + date_columns_sender))  # unique values


def columns_to_datetime(df: pd.DataFrame):
    df[date_columns] = df[date_columns].apply(pd.to_datetime, errors="coerce")
    return df


def read_csv(file: str):
    df = pd.read_csv(file)
    role = sender_str if sender_str in file else receiver_str
    df["Role"] = role
    return df


def generate_df(exp_dir: [str]):
    files = os.listdir(exp_dir)
    files = [os.path.join(exp_dir, file) for file in files if file.endswith(".csv")]
    dfs = [read_csv(file) for file in files]
    for df in dfs:
        df["Scenario"] = os.path.basename(exp_dir)
    return pd.concat(dfs) if len(dfs) > 0 else pd.DataFrame()


def calculate_durations(df):
    """
    Calculate all durations required for analysis
    """
    df["contactsTotal"] = df["NumberOfContacts"]
    df["idsTotal"] = df["NumberOfIDs"]

    # Bonjour
    df["durationBonjour"] = (
        df["DiscoveredReceiverBonjour"] - df["StartedBrowsingBonjour"]
    )

    # AirDrop authentication
    df["durationDiscover"] = (
        df["ReceivedDiscoverResponse"] - df["SendingDiscoverRequest"]
    )
    # PSI authentication
    df["durationPSITotal"] = df["PSICompleted"] - df["SendingStartPSIRequest"]
    df["durationStartPSI_r"] = (
        df["SentStartPSIResponse"] - df["ReceivedStartPSIRequest"]
    )
    df["durationFinishPSI_r"] = (
        df["SentFinishPSIResponse"] - df["ReceivedFinishPSIRequest"]
    )
    df["durationStartPSI"] = (
        df["ReceivedStartPSIResponse"] - df["SendingStartPSIRequest"]
    )
    df["durationFinishPSI"] = (
        df["ReceivedFinishPSIResponse"] - df["SendingFinishPSIRequest"]
    )
    # Calculate pure transmission delays (substract transmission delays)
    dfs_start = df.loc[df["Role"] == "sender", "durationStartPSI"]
    dfs_finish = df.loc[df["Role"] == "sender", "durationFinishPSI"]
    df.loc[df["Role"] == "sender", "durationStartPSI"] = (
        dfs_start
        - df.loc[df["Role"] == "receiver"].set_index(dfs_start.index)[
            "durationStartPSI_r"
        ]
    )
    df.loc[df["Role"] == "sender", "durationFinishPSI"] = (
        dfs_finish
        - df.loc[df["Role"] == "receiver"].set_index(dfs_finish.index)[
            "durationFinishPSI_r"
        ]
    )

    # Micro benchmarks (only for PSI)
    df["ZTimes"] = df["CalculatedZ"] - df["StartCalculatingZ"]
    df["POKTimes"] = df["CalculatedPOK"] - df["StartCalculatingPOK"]
    df["VerifyingTimes"] = df["Verified"] - df["StartVerifying"]
    df["VTimes"] = df["CalculatedV"] - df["StartCalculatingV"]
    df["IntersectionTimes"] = df["Intersected"] - df["StartIntersecting"]

    # File upload
    df["durationFileUpload"] = df["UploadFinished"] - df["StartsSendingFile"]
    # Total time
    df["durationTotal"] = df["UploadFinished"] - df["StartedBrowsingBonjour"]
    # Bonjour + Authentication
    df["durationTotalNoFile"] = df["StartsSendingFile"] - df["StartedBrowsingBonjour"]
    # Authentication
    df["durationAuthentication"] = (
        df["StartsSendingFile"] - df["DiscoveredReceiverBonjour"]
    )

    return df


columns_crypto = ["ZTimes", "POKTimes", "VerifyingTimes", "VTimes", "IntersectionTimes"]

dfs = [generate_df(exp) for exp in experiments]
df = pd.concat(dfs, ignore_index=True)
df = columns_to_datetime(df)
df = calculate_durations(df)
# Add some custom columns
df["Protocol"] = df["Protocol"].replace({"psi": "PrivateDrop", "original": "AirDrop"})
df["SenderReceiver"] = df["Scenario"].str.replace("-airdrop", "")
df["SenderReceiverVertical"] = df["SenderReceiver"].str.replace(
    "→", "\n"
)  # To be used as xtickslabels

# Currently, we are only interested in the sender-side measurements
df = df[df["Role"] == sender_str]

# Convert to milliseconds
df["durationAuthentication"] = df["durationAuthentication"].astype(np.int64) / 1e6

## Plot settings

In [None]:
if LATEX:
    plt.rcParams.update(
        {
            "text.usetex": True,
            "font.family": "serif",
            "font.serif": ["Times"],
            "font.size": 9,
            "figure.autolayout": True,  # tight layout
        }
    )
plt.rcParams.update(
    {
        "hatch.linewidth": 0.5,
        "lines.linewidth": 1,
        "patch.force_edgecolor": True,
    }
)

## Impact of set sizes via cable (Figure 6)

In [None]:
# Calculate percentiles
group = df[df["Scenario"] == "macOS→iOS Cable"].groupby(
    ["NumberOfContacts", "NumberOfIDs"]
)["durationAuthentication"]
median = group.median()
ymin = median - group.quantile(0.05)
ymax = group.quantile(0.95) - median
yerr = np.array([np.array(ymin.unstack(1)), np.array(ymax.unstack(1))])
yerr = yerr.swapaxes(0, 1).swapaxes(0, 2)  # order axes properly
# Barplot incl. error bars
median_unstacked = median.unstack(1)
median_unstacked.index = median_unstacked.index.astype(
    str
)  # So xticks are spaced uniformly
ax = median_unstacked.plot.bar(
    figsize=(3.335, 2.1) if LATEX else None,
    color="lightgrey",
    edgecolor="k",
    yerr=yerr,
    zorder=3,
)

bars = ax.patches
hatch = "//"
patterns = [
    hatch * i for i in range(len(df["NumberOfIDs"].unique()))
]  # set hatch patterns in the correct order
hatches = []  # list for hatches in the order of the bars
for h in patterns:  # loop over patterns to create bar-ordered hatches
    for i in range(int(len(bars) / len(patterns))):
        hatches.append(h)
for bar, hatch in zip(
    bars, hatches
):  # loop over bars and hatches to set hatches in correct order
    bar.set_hatch(hatch)

leg1 = ax.legend(
    title="Number of identifiers $m$", loc="lower center", ncol=3
)  # store legend for later

# Plot reference line
airdrop = df[df["Scenario"] == "macOS→iOS-airdrop Cable"][
    "durationAuthentication"
].median()
line = plt.axhline(y=airdrop, label="AirDrop (median)", color=cmap[2], zorder=10)

# Add additional legend (for reference line)
ax.legend([line], ["AirDrop baseline (median)"], loc="upper center")
ax.add_artist(leg1)
# Horizontal positioning
plt.xticks(rotation=0)
plt.grid(axis="y", zorder=0)
# Set axes labels
ax.set_ylabel("Authentication delay [ms]")
ax.set_xlabel("Number of address book entries $n$")

# Save figure
if LATEX:
    plt.savefig(f"Plots/nm-cable.pdf")

## Impact of set sizes via AWDL (Figure 9)

In [None]:
# Calculate percentiles
group = df[df["Scenario"] == "macOS→iOS"].groupby(["NumberOfContacts", "NumberOfIDs"])[
    "durationAuthentication"
]
median = group.median()
ymin = median - group.quantile(0.05)
ymax = group.quantile(0.95) - median
yerr = np.array([np.array(ymin.unstack(1)), np.array(ymax.unstack(1))])
yerr = yerr.swapaxes(0, 1).swapaxes(0, 2)  # order axes properly
# Barplot incl. error bars
median_unstacked = median.unstack(1)
median_unstacked.index = median_unstacked.index.astype(
    str
)  # So xticks are spaced uniformly
ax = median_unstacked.plot.bar(
    figsize=(3.335, 2.5) if LATEX else None, color="lightgray", yerr=yerr, zorder=3
)

bars = ax.patches
hatch = "//"
patterns = [
    hatch * i for i in range(len(df["NumberOfIDs"].unique()))
]  # set hatch patterns in the correct order
hatches = []  # list for hatches in the order of the bars
for h in patterns:  # loop over patterns to create bar-ordered hatches
    for i in range(int(len(bars) / len(patterns))):
        hatches.append(h)
for bar, hatch in zip(
    bars, hatches
):  # loop over bars and hatches to set hatches in correct order
    bar.set_hatch(hatch)

leg1 = ax.legend(
    title="Number of identifiers $m$", loc="upper center", ncol=3
)  # store legend for later

# Plot reference line
airdrop = df[df["Scenario"] == "macOS→iOS-airdrop"]["durationAuthentication"].median()
line = plt.axhline(y=airdrop, label="AirDrop (median)", color=cmap[2], zorder=10)

# Add additional legend (for reference line)
ax.legend([line], ["AirDrop baseline (median)"], loc="lower center")
ax.add_artist(leg1)
# Horizontal positioning
plt.xticks(rotation=0)
plt.grid(axis="y", zorder=0)
# Set axes labels
ax.set_ylabel("Authentication delay [ms]")
ax.set_xlabel("Number of address book entries $n$")

# Save figure
if LATEX:
    plt.savefig(f"Plots/nm-awdl.pdf")

## Crypto performance (Figure 7)

In [None]:
df_psi = df[
    (df["Scenario"] == "macOS→iOS Cable") & (df["Protocol"] == "PrivateDrop")
].copy()
df_psi[columns_crypto] = (
    df_psi[columns_crypto].astype(np.int64).astype(np.float) / 1e6
)  # in ms

groups = ["NumberOfContacts", "NumberOfIDs"]
calc = df_psi.groupby(groups)[columns_crypto].mean()
unique = [df_psi[g].unique() for g in groups]

# Bar plot
ax = calc.plot.bar(stacked=True, figsize=(3.335, 2.8) if LATEX else None, zorder=2)

# Let's group together the bars by NumberOfContacts
def idx(k, step):
    i = int(k / step)
    x = i * (step + 1) + k % step
    return x


h, _ = ax.get_legend_handles_labels()  # get the handles we want to modify
step = len(unique[1])
length = len(h[0])

xticks = [idx(k, step) for k in range(length)]
xticklabels = [f"{b}\n{a}" for a in unique[0] for b in unique[1]]
xlabel = "Number of address book entries $n$"

hatch = "///"
for pa in h:
    for x, i, rect in zip(xticks, range(len(pa.patches)), pa.patches):  # for each index
        rect.set_x(x - 0.5)
        rect.set_hatch(hatch * int(i % step))  # we add hatches
        rect.set_width(1)

n = [
    ax.bar(0, 0, color="lightgray", label=title, hatch=hatch * int(i % step))
    for i, title in zip(range(step), unique[1])
]
l1 = ax.legend(
    n,
    unique[1],
    title="\# identifiers $m$",
    labelspacing=0.585,
    bbox_to_anchor=(0, 1),
    loc="lower left",
    ncol=1,
    handlelength=1.5,
)

ax.set_xlim(-1, max(xticks) + 1)

column_crypto_pretty = {
    "ZTimes": "$z_i$",
    "POKTimes": "$a_i, c, p$",
    "VerifyingTimes": "$c, y_i^p, a_i \cdot z_i ^ c$",
    "VTimes": "$v_i$",
    "IntersectionTimes": "Intersect\n$u_j = v_i$",
}

# Set new x axis labels
xticks = [i + (i / (step)) + step / 2 - 0.5 for i in range(0, length, step)]
ax.set_xticks(xticks)
ax.set_xticklabels(unique[0], rotation=0)
ax.set_xlabel(xlabel)
ax.set_ylabel("Computation time [ms]")
ax.legend(
    [column_crypto_pretty[s] for s in columns_crypto],
    title="PSI operation",
    ncol=2,
    bbox_to_anchor=(1, 1),
    loc="lower right",
    columnspacing=0.5,
    handlelength=1.5,
)
ax.add_artist(l1)
ax.grid(axis="y", zorder=0)

if LATEX:
    plt.savefig(f"Plots/nm-crypto.pdf")

## Network delay (Figure 8)

In [None]:
df["StartPSI"] = df["durationStartPSI"].dropna().astype(np.int64) / 1e6
df["FinishPSI"] = df["durationFinishPSI"].dropna().astype(np.int64) / 1e6
df["Discover"] = df["durationDiscover"].dropna().astype(np.int64) / 1e6

df_psi = df[
    df["Scenario"].isin(["macOS→iOS", "macOS→iOS Cable"])
    & (df["Protocol"] == "PrivateDrop")
]
df_airdrop = df[
    df["Scenario"].isin(["macOS→iOS-airdrop", "macOS→iOS-airdrop-cable"])
    | (df["Protocol"] == "AirDrop")
]

df_psi = df_psi[df_psi["NumberOfIDs"] == 10]
calc = df_psi.groupby(["NumberOfContacts", "Scenario"])[
    ["FinishPSI", "StartPSI"]
].median()

ax = calc.plot.bar(stacked=True, figsize=(3.335, 2.5) if LATEX else None, zorder=3)

groups = ["NumberOfContacts", "Scenario"]
unique = [df_psi["NumberOfContacts"].unique(), ["AWDL", "Cable"]]

# Let's group together the bars by NumberOfContacts
def idx(k, step):
    i = int(k / step)
    x = i * (step + 1) + k % step
    return x


h, l = ax.get_legend_handles_labels()  # get the handles we want to modify
step = len(unique[1])
length = len(h[0])

xticks = [idx(k, step) for k in range(length)]

hatch = "////"
for pa in h:
    for x, i, rect in zip(xticks, range(len(pa.patches)), pa.patches):  # for each index
        rect.set_x(x - 0.5)
        rect.set_hatch(hatch * int(i % step))  # we add hatches
        rect.set_width(1)

# Add invisible data to add another legend
n = [
    ax.bar(0, 0, color="lightgray", label=title, hatch=hatch * int(i % step))
    for i, title in zip(range(step), unique[1])
]

l1 = ax.legend(
    title="PrivateDrop",
    bbox_to_anchor=(0, 1),
    loc="lower left",
    ncol=2,
    handlelength=1.2,
    labelspacing=0.5,
)

ax.set_xlim(-1, max(xticks) + 1)

# Set new x axis labels
xticks = [i + (i / (step)) + step / 2 - 0.5 for i in range(0, length, step)]
ax.set_xticks(xticks)
ax.set_xticklabels(unique[0], rotation=0)
ax.set_xlabel("Number of address book entries $n$")
ax.set_ylabel("Transmission delay [ms]")
ax.grid(axis="y", zorder=0)

# Add reference AirDrop lines
airdrop_awdl = df_airdrop[
    (df_airdrop["Scenario"] == "macOS→iOS-airdrop") | (df_airdrop["NumberOfIDs"] == 10)
]["Discover"].median()
airdrop_cable = df_airdrop[
    (df_airdrop["Scenario"] == "macOS→iOS-airdrop-cable")
    | (df_airdrop["NumberOfIDs"] == 10)
]["Discover"].median()
line_awdl = plt.axhline(
    y=airdrop_awdl, label="Discover (AWDL)", color="black", zorder=4
)
line_cable = plt.axhline(
    y=airdrop_cable, label="Discover (Cable)", color=cmap[2], zorder=4
)
ax.legend(
    [line_awdl, line_cable],
    ["AWDL", "Cable"],
    title="AirDrop",
    bbox_to_anchor=(1, 1),
    loc="lower right",
    handlelength=1.2,
)
ax.add_artist(l1)

ax.grid(axis="y", zorder=0)
ax.set_ylabel("Duration [ms]")
ax.set_xlabel("Number of address book entries $n$")

if LATEX:
    plt.savefig("Plots/network-delay.pdf")

## Precomputations (Figure 10)

In [None]:
df_psi = df[(df["Scenario"] == "macOS→iOS Cable") & (df["Protocol"] == "PrivateDrop")]

# Calculate percentiles
group = df_psi[df_psi["Scenario"] == "macOS→iOS Cable"].groupby(["NumberOfContacts"])[
    "precomputationDuration"
]
median = group.median()
ymin = median - group.quantile(0.05)
ymax = group.quantile(0.95) - median
yerr = np.array([np.array(ymin), np.array(ymax)])
# Barplot incl. error bars
median.index = median.index.astype(str)  # So xticks are spaced uniformly
ax = median.plot.bar(
    figsize=(3.335, 2.0) if LATEX else None, color="lightgray", yerr=yerr, zorder=3
)

# Horizontal positioning
plt.xticks(rotation=0)
plt.grid(axis="y", zorder=0)
# Set axes labels
ax.set_ylabel("Precomputation time [s]")
ax.set_xlabel("Number of address book entries $n$")

# Save figure
if LATEX:
    plt.savefig(f"Plots/precomputation.pdf")

# Calculate simple linear regression
from sklearn.linear_model import LinearRegression

x = np.array(median.index).reshape(-1, 1)
y = np.array(median).reshape(-1, 1)
model = LinearRegression().fit(x, y)
display(f"Linear regression slope: {model.coef_[0][0]}")