# Analysis for PoST Parameters
The proof consists of a single nonce and several "good" labels.

In [1]:
from IPython.display import display,Math,Markdown
from scipy.stats import binom
import math
from math import log, floor, ceil, sqrt

def dm(str):
    display(Markdown(str))


## Honest Parameters

In [2]:
## Global Parameters

unit_size = 256 # unit size in GiB
label_size = 16 # Label size in bytes

PoST_cells_per_label = 16 # Number of PoST cells per label (each one costs an scrypt to generate/verify)
scrypt_iterations_per_PoST_cell = 8192

n = unit_size * 2**30 / label_size # Total number of labels for one space unit

honest_opt_error = 1/100 # Honest users should fail with at most this probability with {honest_nonces} nonces.
honest_min_error = 2**(-20) # We also compute the number of nonces to achieve this error.

### Hashing-related
nonce_batch_size = 16 # Number of nonces in a "batch" 

## Node-specific parameters
class PoSTer:
    def __init__(self, name, cpu_aes_MiB_sec_for_nonce_batch, read_mb_per_sec, scrypt_per_sec):
        self.name = name
        self.cpu_aes_MiB_sec_for_nonce_batch = cpu_aes_MiB_sec_for_nonce_batch
        self.read_mb_per_sec = read_mb_per_sec
        self.scrypt_per_sec = scrypt_per_sec
        
        # Computed values
        self.nonce_labels_sec = (self.cpu_aes_MiB_sec_for_nonce_batch * 2**20 * nonce_batch_size) / label_size
        self.read_labels_sec = self.read_mb_per_sec * 2**20 / label_size

        # Time to initialize space unit *on a CPU* in seconds
        # This is used to compute the PoW times.
        self.cpu_init_time =  n * PoST_cells_per_label / scrypt_per_sec
    

posters = {
   'fast': PoSTer("i7-12700k/slow SSD", cpu_aes_MiB_sec_for_nonce_batch=2028, read_mb_per_sec=200, scrypt_per_sec=2**13),
   'slow': PoSTer("Intel(R) Pentium(R) Silver J5040 CPU @ 2.00GHz/fast HDD", cpu_aes_MiB_sec_for_nonce_batch=663, read_mb_per_sec = 150, scrypt_per_sec=2**11.843),
}

## Adversarial Parameters

We use these parameters to compute the minimal number of good labels needed.

In [3]:
# Multiplicative advantage for adversary in solving rate-limiting PoW vs CPU-solving honest party
adv_init_time = 60*60*3 # Adversarial initialization time (for a single space unit) in seconds (assuming a super-fast GPU)

# We use time as a proxy for cost...
def adv_pow_advantage(honest_cpu_init_time):
    return honest_cpu_init_time / adv_init_time # If fastest adversary solves PoW in 1 second, then a (slow) honest user on CPU takes `adv_pow_advantage` seconds.

def adv_pow_advantage_for_PoSTer(poster):
    return adv_pow_advantage(poster.cpu_init_time)

dm(rf"""#### The adversarial PoW advantage is""")
for poster in posters.values():
    dm(rf"* **{poster.name}**: Advantage is ~{round(adv_pow_advantage_for_PoSTer(poster))}")


#### The adversarial PoW advantage is

* **i7-12700k/slow SSD**: Advantage is ~3107

* **Intel(R) Pentium(R) Silver J5040 CPU @ 2.00GHz/fast HDD**: Advantage is ~6928

### Finding $q$

We're trying to maximize the gap between the honest success probability and the adversarial success. This gap grows with the inverse of $q$, so the optimal $q$ is just bounded by the honest success probability (or, equivalently, by the expected number of nonces the honest parties must try)

In [4]:
# Find q that guarantees honest users will succeed with honest_nonces except with probability honest_opt_error
def find_q(honest_opt_error, honest_nonces):
    return 1-(honest_opt_error)**(1/honest_nonces)

def find_q_from_expected_nonces(expected_honest_nonces):
    return 1/expected_honest_nonces

## Inverse calculations and optimizations

### Finding $p$ given $k$ and $q$

In [5]:
from scipy import optimize

# Find the p that minimizes, for a binomially distributed variable Binom with parameters n,p, the error
# |Pr[Binom(n,p) >= k] - q|. 
def find_p(k2: int, n1: int, q1: float):
    """Find p such that Pr[X \ge k]=q when X~Binom(n,p)"""
    def p_error(k1):
        if k1 <= 0: 
            return math.inf
        actualq = binom.sf(k2,n1,k1/n1)
        if actualq == 0 or q > 2**(-20) and actualq < 2**(-40):
            return 3-k1/n1
        elif actualq == 1 or q < (1-2**(-20)) and actualq > (1-2**(-40)):
            return 2+k1/n1
        return (q1 - actualq)**2

    bounds=(0, 3*k2)
        
    res = optimize.minimize_scalar(p_error,bounds=bounds, method='bounded', options={'xatol': 2**(-64), 'maxiter': 2**20})
        
    return res.x/n1

# Finding q given p
def find_q_given_p(k2, n, p):
    return binom.sf(k2, n, p)

def find_q_given_k1(k1, k2, n):
    return find_q_given_p(k2, n, k1/n)

### Optimizing Honest Nonces
Here, we're trying to find the maximum number of nonces that allow honest users on "fast" computers to read at I/O speed.

In [6]:
def expected_honest_nonces(q):
    return 1/q

# Number of nonces that succeed with probability `percentile`
def percentile_honest_nonces(q, percentile):
    if q == 1:
        return 1
    return ceil(log(1-percentile)/log(1-q))

# number of parallel nonces to equalize read time and hashing time (not necessarily an integer)
def break_even_nonces(nonce_labels_per_sec, read_labels_per_sec):
    return nonce_labels_per_sec / read_labels_per_sec 


### Optimizing Parallel Nonces

In [7]:
# Actual labels-per-second for a given number of parallel nonces (this is either dominated by hash speed or by read speed)
# This isn't quite accurate, because nonces are computed in batches, and the nonce speed is maximal only for integer multiples of the batch size.
# However, we take care of this in the optimization step
def labels_per_sec(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces):
    return min(nonce_labels_per_sec / parallel_nonces, read_labels_per_sec)

def expected_total_passes(parallel_nonces, q):
    q1 = 1-(1-q)**parallel_nonces # Probability of success in a single pass.
    return 1/q1

# Number of passes that succeed with probability `percentile`
# We're currently ignoring the possibility of a partial pass (i.e., finding k2 good labels with less than one pass)
def percentile_total_passes(parallel_nonces, q, percentile):
    q1 = 1-(1-q)**parallel_nonces # Probability of success in a single pass.
    if q1 == 1:
        return 1
    return ceil(log(1-percentile)/log(1-q1))

def expected_total_time_per_label(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces, q):
    return expected_total_passes(parallel_nonces, q) / labels_per_sec(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces)


def percentile_total_time_per_label(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces, q, percentile):
    return percentile_total_passes(parallel_nonces, q, percentile) / labels_per_sec(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces)


def expected_time_per_pass(nonce_labels_per_sec, read_labels_per_sec, q, parallel_nonces):
    expected_time_per_label = expected_total_time_per_label(hash_labels_per_sec, read_labels_per_sec, parallel_nonces,q)
    time_per_pass = expected_time_per_label * n / 60
    return time_per_pass

def percentile_time_per_pass(nonce_labels_per_sec, read_labels_per_sec, q, parallel_nonces, percentile):
    percentile_time_per_label =  percentile_total_time_per_label(hash_labels_per_sec, read_labels_per_sec, parallel_nonces, q, percentile)
    time_per_pass = percentile_time_per_label * n / 60
    return time_per_pass


def display_optimized_nonces(parallel_nonces, nonce_labels_per_sec, read_labels_per_sec, q, n, percentile):
    mb_per_sec = read_labels_per_sec * label_size / 2**20
    e_time_per_label = expected_total_time_per_label(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces,q)
    p_time_per_label = percentile_total_time_per_label(nonce_labels_per_sec, read_labels_per_sec, parallel_nonces, q, percentile)
    
    dm(rf"""##### Optimized execution for $2^{{ {log(nonce_labels_per_sec,2):.2f} }}$ nonces per label/sec, ${mb_per_sec:.2f}$ MB/sec read rate
* Optimal number of parallel nonces: {parallel_nonces}
  - will take {expected_total_passes(parallel_nonces,q):.2f} passes in expectation
      - ${{ {round(percentile*100)} }}^{{th}}$-percentile is {percentile_total_passes(parallel_nonces, q, percentile):.2f} passes
  - Expected total time per label is {e_time_per_label:.2e} seconds.
      - ${{ {round(percentile*100)} }}^{{th}}$-percentile time per label is {p_time_per_label:.2e} seconds
  - Expected total time for $2^{{ {log(n,2):.2f} }}$ labels is ${e_time_per_label * n / 60:.2f}$ minutes.
      - ${{ {round(percentile*100)} }}^{{th}}$-percentile time is ${p_time_per_label * n / 60:.2f}$ minutes.
""")


## Computing Rate-limiting PoW difficulty and time

### Adversarial work

We denote $W(c)$ the *expected* amount of work the adversary must do to generate a valid proof for one unit of space while storing only a $c$ fraction of the unit.
We use two units to measure cost: the storage cost, $S$ (for a single unit of spacetime) and the init cost $I$ for a single unit of space. Denote $\alpha=I/S$ (by our parameter selection, we expect $\alpha > 1$).

When storing a $c$ fraction of the space, the adversary pays at least $c\cdot S = (c/\alpha)\cdot I$ in storage cost, and then an additional $W(c)\cdot I$ init-units in PoW costs. 
If the PoW cost for trying a single nonce is $w$,then $W(c)=w\cdot E(\text{number of nonces tried until solution when storing a $c$ fraction})$.

Note that the adversary has an advantage in cost of running the PoW compared to the honest user (e.g., the honest users would probably use CPU vs. the adversary who is using a GPU or an ASIC). We denote $adv$ the advantage factor.

Therefore the honest party pays $S = I/\alpha$ in storage cost, and expected $W(1)\cdot adv \cdot I$ init-units in PoW costs.

The adversary's expected advantage is thus:
$$
    \frac{1/\alpha + W(1)\cdot adv}{c/\alpha + W(c)}
$$

For every value of $c\le 1$ and $adv \ge 1$, the advantage is monotone decreasing in $\alpha$ (i.e., the higher the value of $\alpha$, the lower the advantage). Thus, we can bound the adversarial advantage by taking the minimal value of $\alpha=1$, giving the bound:

$$
    \frac{1 + W(1)\cdot adv}{c + W(c)}
$$

In [8]:
# The equivalent of w init units in PoST cells
def PoST_cell_equivalent_for_w(w, n):
    return n * PoST_cells_per_label * w

# Log2 of the expected number of nonces the adversary must try to find a valid proof.
def log_adversarial_nonces(c, k1, k2, n):
    p = k1/n
    adv_log_prob = binom.logsf(k2, ceil(c*n), p)/log(2) # (Log2 of) Probability that the adversary finds a good proof for a single nonce
    return -adv_log_prob
    
# A lower bound on the expected adversarial cost in init units.
def adversarial_cost(c, k1, k2, n, w):    
    return c + w * 2**log_adversarial_nonces(c, k1, k2, n)
      
def adversarial_advantage(c, k1, k2, n, w, adv_pow_advantage):
    q = find_q_given_k1(k1, k2, n)
    honest_expected_nonces = expected_honest_nonces(q)
    return (1+w*adv_pow_advantage) / adversarial_cost(c, k1, k2, n, w)

### Adversary's optimal $c$ for given parameters

In [9]:
def find_optimal_c(k1, k2, n, w, disp=0):
    def get_adv_cost(c):
        return adversarial_cost(c, k1, k2, n, w)
        
    res = optimize.minimize_scalar(get_adv_cost, bounds=(0, 1), method='bounded', options={'disp': disp, 'xatol': 2**(-64), 'maxiter': 2**20})
    
    return res.x, res.fun

### Finding maximal $k_1$ for a given adversarial advantage
When all else is fixed, the lower the value of $k_1$, the lower the adversarial advantage, but the higher the cost to the honest parties (number of nonces required).

The adversarial advantage (as opposed to the adversarial cost) depends on the specific honest party to which we are comparing

In [10]:
def find_maximal_k1(max_adv_advantage, k2, n, w, adv_pow_advantage, disp=0):
    honest_cost = (1+w*adv_pow_advantage)
    adv_cost_target = honest_cost / max_adv_advantage
    
    def get_adv(k1):
        c, cost = find_optimal_c(k1, k2, n, w, disp)
        if cost == adv_cost_target:
            return -math.inf
        elif cost < adv_cost_target:
            # Cost is less than target, we need it to be larger
            return 1/cost
        else:
            # Cost is already greater than target, we need it to be smaller
            return  -1/(cost - adv_cost_target) # This is negative, and decreases as cost decreases
        
    res = optimize.minimize_scalar(get_adv, bounds=(k2/2, k2*2), method='bounded', options={'disp': disp, 'xatol': 2**(-64), 'maxiter': 2**20})
    
    return res.x

In [11]:
# Find the number of parallel nonces that minimize the total time to succeed with probability at least `percentile`
# We assume passes are always fully completed.

def find_opt_parallel_nonces(poster, k1, k2, n, w, percentile):
    q = find_q_given_k1(k1, k2, n)
    
    def percentile_time(nonce_batches):
        honest_pow_time = poster.cpu_init_time * w * nonce_batches * nonce_batch_size
        return honest_pow_time+n*percentile_total_time_per_label(poster.nonce_labels_sec, poster.read_labels_sec, nonce_batches * nonce_batch_size, q, percentile)
    
    break_even = break_even_nonces(poster.nonce_labels_sec, poster.read_labels_sec)
    res = optimize.minimize_scalar(percentile_time, bounds=(1, 5*ceil(break_even / nonce_batch_size)), method='bounded', options={'xatol': 0.1, 'maxiter': 2000})
    if percentile_time(ceil(res.x)) > percentile_time(floor(res.x)):
        return floor(res.x) * nonce_batch_size
    else:
        return ceil(res.x) * nonce_batch_size

In [12]:

# Display the calculations for a proof that requires $k_2$ good labels out of $n$, where each label is good with probability $p$.
def display_results(k1, k2, n, percentile):
    actualq = find_q_given_k1(k1, k2, n) # Actual probability of finding at least k good labels out of n (might differ from q due to rounding)

    if actualq == 0 or actualq == 1 or log(1-actualq) == 0:
        dm(rf"**Warning**: actualq=${actualq:.2f}=2^{{ {log(actualq,2):.2f} }}$")
        
    
    opt_nonces = percentile_honest_nonces(actualq, 1-honest_opt_error)
    honest_actual_error = (1-actualq)**opt_nonces
    
    expected_nonces = expected_honest_nonces(actualq)
    # expected_batches = ceil(expected_nonces / honest_nonce_batch)

    dm(rf"""### Proof consists of a single nonce with $k_2={k2}$ good labels

* $k_1 = {k1:.2f}$ ($p=\frac{{k_1}}{{n}}={k1/n:.2e}$)
* Proof size is ${k2 *ceil(log(n,256))}$ bytes. 
* Verification cost is ${k2}$ scrypt evaluations.
* {round(1/actualq)} nonces required in expectation.
""")
    for poster in posters.values():
        parallel_nonces = find_opt_parallel_nonces(poster, k1, k2, n, w, percentile)
    

In [13]:

def display_poster_results(c, k1, k2, n, w, poster, adv_init_time, percentile, parallel_nonces = None):
    q = find_q_given_k1(k1, k2, n)
    
    if parallel_nonces == None:
        parallel_nonces = find_opt_parallel_nonces(poster, k1, k2, n, w, percentile)
    
    honest_pow_time = poster.cpu_init_time * w * parallel_nonces
    adv_log_nonces = log_adversarial_nonces(c, k1, k2, n)
    adv_work_cost = w * 2**adv_log_nonces

    dm(rf"""#### {poster.name}""")
    
    display_optimized_nonces(parallel_nonces, poster.nonce_labels_sec, poster.read_labels_sec, q, n, percentile)

    dm(rf""" 
* Total PoW time for honest party: {round(honest_pow_time)} seconds ({honest_pow_time/60:.2f} minutes), with {percentile*100}% success probability, 
* Adversarial advantantage (with c={c:.3f}) is {adversarial_advantage(c, k1, k2, n, w, poster.cpu_init_time / adv_init_time):.3f}
    - Adversary will try $2^{{ {adv_log_nonces:.2f} }}$ nonces in expectation, costing {adv_work_cost:.2f} init-units of work.
    """)
    


# Putting things together
### Setting $k_2$ and computing $k_1$ to bound adversarial advantage

In [17]:
k2 = 100
percentile = 0.99
max_adv_advantage = 1.25

# Pow Seconds per nonce
slow_cpu_pow_seconds_per_nonce = 20
w = slow_cpu_pow_seconds_per_nonce / posters['slow'].cpu_init_time

k1 = find_maximal_k1(max_adv_advantage, k2, n, w, adv_pow_advantage_for_PoSTer(posters['slow']))

c, advcost = find_optimal_c(k1, k2, n, w)

display_results(k1, k2, n, percentile)
for poster in posters.values():
    display_poster_results(c, k1, k2, n, w, poster, adv_init_time, percentile)


### Proof consists of a single nonce with $k_2=100$ good labels

* $k_1 = 82.15$ ($p=\frac{k_1}{n}=4.78e-09$)
* Proof size is $500$ bytes. 
* Verification cost is $100$ scrypt evaluations.
* 41 nonces required in expectation.


#### i7-12700k/slow SSD

##### Optimized execution for $2^{ 30.99 }$ nonces per label/sec, $200.00$ MB/sec read rate
* Optimal number of parallel nonces: 192
  - will take 1.01 passes in expectation
      - ${ 99 }^{th}$-percentile is 1.00 passes
  - Expected total time per label is 9.11e-08 seconds.
      - ${ 99 }^{th}$-percentile time per label is 9.03e-08 seconds
  - Expected total time for $2^{ 34.00 }$ labels is $26.09$ minutes.
      - ${ 99 }^{th}$-percentile time is $25.85$ minutes.


 
* Total PoW time for honest party: 1722 seconds (28.70 minutes), with 99.0% success probability, 
* Adversarial advantantage (with c=0.781) is 1.249
    - Adversary will try $2^{ 16.22 }$ nonces in expectation, costing 0.02 init-units of work.
    

#### Intel(R) Pentium(R) Silver J5040 CPU @ 2.00GHz/fast HDD

##### Optimized execution for $2^{ 29.37 }$ nonces per label/sec, $150.00$ MB/sec read rate
* Optimal number of parallel nonces: 64
  - will take 1.26 passes in expectation
      - ${ 99 }^{th}$-percentile is 3.00 passes
  - Expected total time per label is 1.28e-07 seconds.
      - ${ 99 }^{th}$-percentile time per label is 3.05e-07 seconds
  - Expected total time for $2^{ 34.00 }$ labels is $36.76$ minutes.
      - ${ 99 }^{th}$-percentile time is $87.38$ minutes.


 
* Total PoW time for honest party: 1280 seconds (21.33 minutes), with 99.0% success probability, 
* Adversarial advantantage (with c=0.781) is 1.250
    - Adversary will try $2^{ 16.22 }$ nonces in expectation, costing 0.02 init-units of work.
    

## Setting both $k_2$ and $k_1$ and computing adversarial advantage

In [18]:
k2 = 60
percentile = 0.90
k1 = 45

# Pow Seconds per nonce
slow_cpu_pow_seconds_per_nonce = 5
w = slow_cpu_pow_seconds_per_nonce / posters['slow'].cpu_init_time

c, advcost = find_optimal_c(k1, k2, n, w)

display_results(k1, k2, n, percentile)
for poster in posters.values():
    display_poster_results(c, k1, k2, n, w, poster, adv_init_time, percentile)
  



### Proof consists of a single nonce with $k_2=60$ good labels

* $k_1 = 45.00$ ($p=\frac{k_1}{n}=2.62e-09$)
* Proof size is $300$ bytes. 
* Verification cost is $60$ scrypt evaluations.
* 75 nonces required in expectation.


#### i7-12700k/slow SSD

##### Optimized execution for $2^{ 30.99 }$ nonces per label/sec, $200.00$ MB/sec read rate
* Optimal number of parallel nonces: 176
  - will take 1.10 passes in expectation
      - ${ 90 }^{th}$-percentile is 1.00 passes
  - Expected total time per label is 9.14e-08 seconds.
      - ${ 90 }^{th}$-percentile time per label is 8.28e-08 seconds
  - Expected total time for $2^{ 34.00 }$ labels is $26.18$ minutes.
      - ${ 90 }^{th}$-percentile time is $23.70$ minutes.


 
* Total PoW time for honest party: 395 seconds (6.58 minutes), with 90.0% success probability, 
* Adversarial advantantage (with c=0.708) is 1.368
    - Adversary will try $2^{ 18.42 }$ nonces in expectation, costing 0.02 init-units of work.
    

#### Intel(R) Pentium(R) Silver J5040 CPU @ 2.00GHz/fast HDD

##### Optimized execution for $2^{ 29.37 }$ nonces per label/sec, $150.00$ MB/sec read rate
* Optimal number of parallel nonces: 176
  - will take 1.10 passes in expectation
      - ${ 90 }^{th}$-percentile is 1.00 passes
  - Expected total time per label is 2.80e-07 seconds.
      - ${ 90 }^{th}$-percentile time per label is 2.53e-07 seconds
  - Expected total time for $2^{ 34.00 }$ labels is $80.09$ minutes.
      - ${ 90 }^{th}$-percentile time is $72.49$ minutes.


 
* Total PoW time for honest party: 880 seconds (14.67 minutes), with 90.0% success probability, 
* Adversarial advantantage (with c=0.708) is 1.369
    - Adversary will try $2^{ 18.42 }$ nonces in expectation, costing 0.02 init-units of work.
    