In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from typing import List

In [2]:
def compute_storage(train_dataset_sizes: List[int], model_size: int, prediction_x_size: int, prediction_y_size: int = 1,
                    n_computer_train=3, n_computer_inference=3,
                    KZG_BYTES=48, PED_BYTES=32, ECDSA_SIG_BYTES=64, ECDSA_PUBLICKEY_SIZE=32, SHA256_BYTES=32):

    n_elements_per_sample = prediction_x_size + prediction_y_size
    
    commitments = {
        "pc": {
            "commitment_train": KZG_BYTES * len(train_dataset_sizes),
            "commitment_model": KZG_BYTES,
            "commitment_prediction_x": KZG_BYTES, # we need to keep randomness but then we can not store the commitment, so essentially the same
            "commitment_prediction_y": KZG_BYTES,
            "commitment_randomness": KZG_BYTES,
            "trusted_setup": max(prediction_x_size, prediction_y_size) * KZG_BYTES,
        },
        "pc_ipa": {
            "commitment_train": PED_BYTES * len(train_dataset_sizes),
            "commitment_model": PED_BYTES,
            "commitment_prediction_x": PED_BYTES,
            "commitment_prediction_y": PED_BYTES,
            "commitment_randomness": PED_BYTES,
            "trusted_setup": 0,
        },
        "cerebro": {
            "commitment_train": sum([PED_BYTES * n_elements_per_sample * size for size in train_dataset_sizes]),
            "commitment_model": PED_BYTES * model_size,
            "commitment_prediction_x": PED_BYTES * prediction_x_size, # we could technically recompute these?
            "commitment_prediction_y": PED_BYTES * prediction_y_size,
            "commitment_randomness": PED_BYTES,
            "trusted_setup": 0,
        },
        "sha3": {
            "commitment_train": SHA256_BYTES * len(train_dataset_sizes),
            "commitment_model": SHA256_BYTES,
            "commitment_prediction_x": SHA256_BYTES,
            "commitment_prediction_y": SHA256_BYTES,
            "commitment_randomness": SHA256_BYTES,
            "trusted_setup": 0,
        }
    }

    common = {
        "signature_train": ECDSA_SIG_BYTES * len(train_dataset_sizes),
        "signature_modelowner": ECDSA_SIG_BYTES,
        
        "signature_computer_train": ECDSA_SIG_BYTES, # joint sig
        "signature_computer_inference": ECDSA_SIG_BYTES,
        
        "identity_train": ECDSA_PUBLICKEY_SIZE * len(train_dataset_sizes),
        "identity_modelowner": ECDSA_PUBLICKEY_SIZE,
        "identity_computer_train": ECDSA_PUBLICKEY_SIZE * n_computer_train,
        "identity_computer_inference": ECDSA_PUBLICKEY_SIZE * n_computer_inference
    }
    
    # to each entry in commitments add common items
    for commitment in commitments.values():
        commitment.update(common)
        
    return commitments



In [8]:
s_adult = compute_storage([8684, 8746, 8618], 2977, 91, 1)
s_mnist = compute_storage([20012, 20057, 19931], 431080, 784, 1)
s_cifar = compute_storage([16709, 16682, 16609], 3882058, 3072, 1)

hidden_state = 768
seq_len = 128
s_stanford_dataset = 104743
#approx 2500
s_stanfordqa = compute_storage([833, 833, 833], 
                               85646595, hidden_state * seq_len, 1)

# add to dataframe
df = pd.DataFrame([s_adult, s_mnist, s_cifar, s_stanfordqa], index=["adult", "mnist_full", "cifar_alexnet", "glue_qnli_bert"])
df_unstacked = df.unstack().apply(pd.Series).reset_index()
df_unstacked = df_unstacked.rename(columns= {
    "level_0": "approach",
    "level_1": "dataset"
})

# we ignore the identity size
df_unstacked["storage_train"] = df_unstacked[["commitment_train", "signature_train", "signature_computer_train", "commitment_model", "commitment_randomness"]].sum(axis=1)
df_unstacked["storage_inference"] = df_unstacked["storage_train"] + df_unstacked[["commitment_prediction_x", "commitment_prediction_y", "signature_computer_inference", "signature_modelowner"]].sum(axis=1)

display(df_unstacked)

# now adapt so it looks like what we expect for the etl
df_unstacked["approach"] = df_unstacked["approach"].replace("sha3", "sha3s")
df_unstacked["dataset"] = df_unstacked["dataset"].replace("mnist", "mnist_full")
df_unstacked["dataset"] = df_unstacked["dataset"].replace("cifar", "cifar_alexnet")
df_unstacked = df_unstacked.drop(df_unstacked[df_unstacked['approach'] == 'pc_ipa'].index)

df_unstacked = df_unstacked.rename(columns = {
    "approach": "consistency_args.type",
    "dataset": "mpc.script_args.dataset"
})

df_unstacked.to_csv("storage.csv")

Unnamed: 0,approach,dataset,commitment_train,commitment_model,commitment_prediction_x,commitment_prediction_y,commitment_randomness,trusted_setup,signature_train,signature_modelowner,signature_computer_train,signature_computer_inference,identity_train,identity_modelowner,identity_computer_train,identity_computer_inference,storage_train,storage_inference
0,pc,adult,144,48,48,48,48,4368,192,64,64,64,96,32,96,96,496,720
1,pc,mnist_full,144,48,48,48,48,37632,192,64,64,64,96,32,96,96,496,720
2,pc,cifar_alexnet,144,48,48,48,48,147456,192,64,64,64,96,32,96,96,496,720
3,pc,glue_qnli_bert,144,48,48,48,48,4718592,192,64,64,64,96,32,96,96,496,720
4,pc_ipa,adult,96,32,32,32,32,0,192,64,64,64,96,32,96,96,416,608
5,pc_ipa,mnist_full,96,32,32,32,32,0,192,64,64,64,96,32,96,96,416,608
6,pc_ipa,cifar_alexnet,96,32,32,32,32,0,192,64,64,64,96,32,96,96,416,608
7,pc_ipa,glue_qnli_bert,96,32,32,32,32,0,192,64,64,64,96,32,96,96,416,608
8,cerebro,adult,76685312,95264,2912,32,32,0,192,64,64,64,96,32,96,96,76780864,76783936
9,cerebro,mnist_full,1507200000,13794560,25088,32,32,0,192,64,64,64,96,32,96,96,1520994848,1521020096
