In [1]:
import pandas as pd
df = pd.read_csv("./data/e2e_table.csv")
display(df)

Unnamed: 0,dataset,type,acc,mean_s,sum_s,bandwidth_GB,mean_factor,bw_factor
0,CIFAR Large,l2_opt,0.8168,207.837704,33254.032704,89.71,9.8,5
1,CIFAR Large,linf_opt,0.8478,130.663239,20906.118239,411.05,6.16,24
2,CIFAR Large,plain,0.8605,21.204623,3392.739623,16.78,1.0,1
3,CIFAR Small,l2,0.6033,280.169505,28016.950505,86.91,153.09,37
4,CIFAR Small,l2_opt,0.584,53.172343,5317.234343,16.83,29.05,7
5,CIFAR Small,linf,0.6038,251.270364,25127.036364,58.34,137.3,25
6,CIFAR Small,linf_opt,0.6045,37.942172,3794.217172,58.34,20.73,25
7,CIFAR Small,plain,0.6058,1.830131,183.013131,2.38,1.0,1
8,MNIST,l2,0.9854,131.060289,20969.646289,42.99,62.61,36
9,MNIST,l2_opt,0.9741,26.269918,4203.186918,11.22,12.55,10


In [2]:
# extrapolate cifar_large rsl
# method:
#  - estimate RSL training time for 40k, 45k, 

In [3]:
import numpy as np

timings_per_round_sec = {
    'CIFAR Large': [2233, 2193, 2230, 2210],
    'Shakespeare': [6800] # estimate
}

rounds = {
            'MNIST': 160,
            'CIFAR Small': 100,
             'CIFAR Large': 160,
             'Shakespeare': 20
         }

to_extrapolate = {
    'CIFAR Large': {
        'linf_ext': {
            "timings": [2233, 2193, 2230, 2210],
            "bandwidth": df[(df['dataset'] == "CIFAR Large") & (df['type'] == 'linf_opt')]["bandwidth_GB"].item(),
            "accuracy": df[(df['dataset'] == "CIFAR Large") & (df['type'] == 'linf_opt')]["acc"].item()
        },
        'l2_ext': {
            "timings": [2277.959, 2278.978, 2253.280, 2268.361, 2262.207, 2158.015],
            "bandwidth": 612.5,
            "accuracy": 0.85
        }
    },
    'Shakespeare': {
        'linf_ext': {
            "timings": [5398, 5366, 5415, 5363],
            "bandwidth": df[(df['dataset'] == "Shakespeare") & (df['type'] == 'linf_opt')]["bandwidth_GB"].item(),
            "accuracy": df[(df['dataset'] == "Shakespeare") & (df['type'] == 'linf_opt')]["acc"].item()
        },
        'l2_ext': {
            "timings": [5667.230, 5648.221, 5613.286, 5561.957],
            "bandwidth": 229.28,
            "accuracy": df[(df['dataset'] == "Shakespeare") & (df['type'] == 'linf_opt')]["acc"].item()
        }
    }
}

for ext, rows in to_extrapolate.items():
    for bound_type, data in rows.items():
        factor_row = df[(df['dataset'] == ext) & (df['type'] == 'plain')]

        mean_s = np.mean(data["timings"])
        sum_s = mean_s * float(rounds[ext])
        bw = data['bandwidth']

        mean_factor = int(round(mean_s / factor_row['mean_s'].item(), 2))
        bw_factor = int(round(bw / factor_row['bandwidth_GB'].item(), 0))

        df_extrapolated = pd.DataFrame(
            [
                [ext, bound_type, data["accuracy"], mean_s, sum_s,
                 bw, mean_factor, bw_factor]
            ], columns=['dataset', 'type', 'acc', 'mean_s', 'sum_s',
                                          'bandwidth_GB', 'mean_factor', 'bw_factor']
        )
        df = df.append(df_extrapolated)

display(df)

Unnamed: 0,dataset,type,acc,mean_s,sum_s,bandwidth_GB,mean_factor,bw_factor
0,CIFAR Large,l2_opt,0.8168,207.837704,33254.032704,89.71,9.8,5
1,CIFAR Large,linf_opt,0.8478,130.663239,20906.118239,411.05,6.16,24
2,CIFAR Large,plain,0.8605,21.204623,3392.739623,16.78,1.0,1
3,CIFAR Small,l2,0.6033,280.169505,28016.950505,86.91,153.09,37
4,CIFAR Small,l2_opt,0.584,53.172343,5317.234343,16.83,29.05,7
5,CIFAR Small,linf,0.6038,251.270364,25127.036364,58.34,137.3,25
6,CIFAR Small,linf_opt,0.6045,37.942172,3794.217172,58.34,20.73,25
7,CIFAR Small,plain,0.6058,1.830131,183.013131,2.38,1.0,1
8,MNIST,l2,0.9854,131.060289,20969.646289,42.99,62.61,36
9,MNIST,l2_opt,0.9741,26.269918,4203.186918,11.22,12.55,10


In [5]:
header = r"""
\begin{table}
    \centering
    \resizebox{\columnwidth}{!}{%
    \begin{tabular}{lrcrrrcrr}
        \toprule
             &                && \multicolumn{3}{c}{Computation Time} & & \multicolumn{2}{c}{Bandwidth} \\
             \cline{4-6}\cline{8-9}
        Type & Acc. && Round [s] & Total [m] & Factor & & Total [MB] & Factor \\"""


mnist_header = r"""        \toprule
        \multicolumn{9}{c}{\textsc{MNIST} (19k params, rsl 5k params, 160 rounds)} \\"""

cifar_small_header = r"""        \toprule
        \multicolumn{9}{c}{\textsc{CIFAR-10 S} (62k params, rsl 12k params, 100 rounds)} \\"""

cifar_large_header = r"""        \toprule
        \multicolumn{9}{c}{\textsc{CIFAR-10 L} (273k params, 160 rounds)} \\"""

shakespeare_large_header = r"""        \toprule
        \multicolumn{9}{c}{\textsc{Shakespeare} (818k params, 20 rounds)} \\"""

footer = r"""        \bottomrule
    \end{tabular}
    }
\end{table}
"""

type_order = {
    "plain": 0,
    "l2": 1,
    "l2_ext": 2,
    "l2_opt": 3,
    "l2_opt_ext": 4,
    "linf": 5,
    "linf_ext": 6,
    "linf_opt": 7,
    "linf_opt_ext": 8
}

df['type_order'] = df['type'].map(type_order)
df = df.sort_values(by=["dataset", "type_order"])

type_print = {
    "plain": r"SA",
    "l2": r"$L_2$",
    "l2_opt": r"$L_2^{(rsl)}$",
    "linf": r"$L_\infty$",
    "linf_opt": r"$L_\infty^{(p)}$",

    "linf_ext": r"$L_\infty$",
    "l2_ext": r"$L_2$"
}

type_extrapolated = [
    "linf_ext", "l2_ext"
]

def format_mean_factor(factor):
    if factor > 1.01 and factor < 3.0:
        return f"{round(factor, 1):.1f}"
    return f"{round(factor, 0):.0f}"

def format_row(x):
    num_rounds = rounds[x.type]
    num_clients = 48
    def format_per_client_per_round(v):
        return round(float(v) / (num_rounds * num_clients), 1)
    
    if x.type in type_extrapolated:
        return fr"{type_print[x.type]} * & {round(x.acc, 2):.2f} & & {round(x.mean_s)} & {round(x.sum_s/60)} & {format_mean_factor(x.mean_factor)}x & & {format_per_client_per_round(x.bandwidth_GB)} & {x.bw_factor}x \\"

    # total_time = f"{round(x.sum_s/60)}m" if round(x.sum_s/60) > 0 else f"{round(x.sum_s)}s"
    return fr"{type_print[x.type]} & {round(x.acc, 2):.2f} & & {round(x.mean_s)} & {round(x.sum_s/60)} & {format_mean_factor(x.mean_factor)}x & & {format_per_client_per_round(x.bandwidth_GB)} & {x.bw_factor}x \\"

# PRINT THE LATEX TABLE

print(header)

print(mnist_header)


for row in df[df["dataset"]=="MNIST"].itertuples():
    print("        " + format_row(row))

print(cifar_small_header)
    
for row in df[df["dataset"]=="CIFAR Small"].itertuples():
    print("        " + format_row(row))

print(cifar_large_header)
    
for row in df[df["dataset"]=="CIFAR Large"].itertuples():
    print("        " + format_row(row))

print(shakespeare_large_header)

for row in df[df["dataset"]=="Shakespeare"].itertuples():
    print("        " + format_row(row))
    
print(footer)




\begin{table}
    \centering
    \resizebox{\columnwidth}{!}{%
    \begin{tabular}{lrcrrrcrr}
        \toprule
             &                && \multicolumn{3}{c}{Computation Time} & & \multicolumn{2}{c}{Bandwidth} \\
             \cline{4-6}\cline{8-9}
        Type & Acc. && Round [s] & Total [m] & Factor & & Total [GB] & Factor \\
        \toprule
        \multicolumn{9}{c}{\textsc{MNIST} (19k params, rsl 5k params, 160 rounds)} \\
        plain & 0.99 & & 2 & 6 & 1x & & 1.2 & 1x \\
        $L_2$ & 0.99 & & 131 & 349 & 63x & & 43.0 & 36x \\
        $L_2^{(rsl)}$ & 0.97 & & 26 & 70 & 13x & & 11.2 & 10x \\
        $L_\infty$ & 0.99 & & 122 & 325 & 58x & & 28.9 & 24x \\
        $L_\infty^{(p)}$ & 0.99 & & 20 & 53 & 9x & & 28.9 & 24x \\
        \toprule
        \multicolumn{9}{c}{\textsc{CIFAR-10 S} (62k params, rsl 12k params, 100 rounds)} \\
        plain & 0.61 & & 2 & 3 & 1x & & 2.4 & 1x \\
        $L_2$ & 0.60 & & 280 & 467 & 153x & & 86.9 & 37x \\
        $L_2^{(rsl)}$ & 0.58 & & 