In [None]:
import numpy as np
import pandas as pd

from prv_accountant import PoissonSubsampledGaussianMechanism, PRVAccountant
from prv_accountant.dpsgd import find_noise_multiplier
from matplotlib import pyplot as plt

In [None]:
epsilon = 4.0
delta = 1e-8
fnrs = np.linspace(0,1,100)

In [None]:
def privacy_boundary_lo(fnr: float, eps: float, delta: float) -> float:
    return np.maximum(0, np.maximum(1 - delta - fnr * np.exp(eps),
                                    (1 - delta - fnr) * np.exp(-eps)))

In [None]:
def compute_fprs(fnrs: np.ndarray, batch_size: int, dataset_size: int, epochs: int, target_epsilon: float, target_delta: float):
    p = batch_size/dataset_size
    num_steps = epochs * dataset_size//batch_size
    eps_error = 0.1
    delta_error = target_delta*1e-3
    noise_multiplier = find_noise_multiplier(sampling_probability=p, num_steps=num_steps, target_delta=target_delta, target_epsilon=target_epsilon, eps_error=eps_error)
    prv = PoissonSubsampledGaussianMechanism(sampling_probability=p, noise_multiplier=noise_multiplier)
    acc = PRVAccountant(prvs=prv, eps_error=eps_error, delta_error=delta_error, max_self_compositions=num_steps)
    f_n = acc.compute_composition(num_steps)
    if abs(f_n.compute_epsilon(target_delta, delta_error=delta_error, epsilon_error=eps_error)[1] - target_epsilon) > eps_error:
        raise ValueError(f"Computed epsilon is not close to target epsilon. Computed: {f_n.compute_epsilon(target_delta)} Target: {target_epsilon}")
    x, y = f_n.compute_f_estimates()
    return np.interp(fnrs, x.astype(np.float64), y.astype(np.float64))

In [None]:
fprs = {}

In [None]:
fprs["bs=500"] = compute_fprs(fnrs, batch_size=500, dataset_size=1_000_000, epochs=10, target_epsilon=epsilon, target_delta=delta)

In [None]:
fprs["bs=1k"] = compute_fprs(fnrs, batch_size=1_000, dataset_size=1_000_000, epochs=10, target_epsilon=epsilon, target_delta=delta)

In [None]:
fprs["bs=2k"] = compute_fprs(fnrs, batch_size=2_000, dataset_size=1_000_000, epochs=10, target_epsilon=epsilon, target_delta=delta)

In [None]:
fig, ax = plt.subplots()
ax.plot(fnrs, fprs["bs=500"], label="bs=500")
ax.plot(fnrs, fprs["bs=1k"], label="bs=1k")
ax.plot(fnrs, fprs["bs=2k"], label="bs=2k")
ax.plot(fnrs, privacy_boundary_lo(fnrs, epsilon, delta), label=f"({epsilon},{delta})-DP")
ax.set_xlabel('FNR')
ax.set_ylabel('FPR')
ax.set_aspect('equal')
ax.legend()

In [None]:
pd.DataFrame(data={
    "fnr": fnrs,
    "fpr_bs_500": fprs["bs=500"],
    "fpr_bs_1000": fprs["bs=1k"],
    "fpr_bs_2000": fprs["bs=2k"],
    "fpr_dp": privacy_boundary_lo(fnrs, epsilon, delta)
}).to_csv("batch_size.tsv", sep="\t", index=False)

In [None]:
fprs["e=5"] = compute_fprs(fnrs, batch_size=1_000, dataset_size=1_000_000, epochs=5, target_epsilon=epsilon, target_delta=delta)

In [None]:
fprs["e=10"] = compute_fprs(fnrs, batch_size=1_000, dataset_size=1_000_000, epochs=10, target_epsilon=epsilon, target_delta=delta)

In [None]:
fprs["e=20"] = compute_fprs(fnrs, batch_size=1_000, dataset_size=1_000_000, epochs=20, target_epsilon=epsilon, target_delta=delta)

In [None]:
fig, ax = plt.subplots()
ax.plot(fnrs, fprs["e=5"], label="e=5")
ax.plot(fnrs, fprs["e=10"], label="e=10")
ax.plot(fnrs, fprs["e=20"], label="e=20")
ax.plot(fnrs, privacy_boundary_lo(fnrs, epsilon, delta), label=f"({epsilon},{delta})-DP")
ax.set_xlabel('FNR')
ax.set_ylabel('FPR')
ax.set_aspect('equal')
ax.legend()

In [None]:
pd.DataFrame(data={
    "fnr": fnrs,
    "fpr_e_5": fprs["e=5"],
    "fpr_e_10": fprs["e=10"],
    "fpr_e_20": fprs["e=20"],
    "fpr_dp": privacy_boundary_lo(fnrs, epsilon, delta)
}).to_csv("epochs.tsv", sep="\t", index=False)