# Table of sampling rates

Table 1 in the paper. Workload for Bernoulli sampling RLA, using a risk limit of 5%. Simulations assume that the reported margins were correct.


In [1]:
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
import math
import numpy as np
import scipy as sp
from scipy.stats import binom, hypergeom
import pandas as pd
from geometric_skipping import geometric_skipping
from sprt import ballot_polling_sprt

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
def compute_unconditional_power(Nw, Nl, popsize, pi, alpha, reps=10000, seed=12345678900987654321):
    '''
    Estimate unconditional power of the test: sample at rate pi, then run the SPRT on that sample.
    Calculations assume that the reported margin is correct and that the nuisance parameter 
    (total number of ballots for either w or l) is known.
        
    Nw = total number of votes reported for w,
    Nl = total number of votes reported for l,
    popsize = total population size,
    pi = the sampling probability,
    alpha = the type I error rate,
    reps = number of simulations
    '''
    pvalues = np.zeros(reps)
    population = np.array([0]*Nl + [1]*Nw + [np.nan]*(popsize-Nw-Nl)) # Population of ballots (truth)
    np.random.shuffle(population)
    for i in range(reps):
#        sam = geometric_skipping(population, pi, seed=seed*i) # slow because the SHA256 generator is slow.
        n = binom.rvs(popsize, pi)
        sam = np.random.choice(population, size=n, replace=False)
        res = ballot_polling_sprt(sample=sam, popsize=popsize, alpha=alpha, Vw=Nw, Vl=Nl, number_invalid=popsize-Nw-Nl)
        pvalues[i] = res['pvalue']
    power = np.mean(pvalues <= alpha)
    return power

## Power table for $\alpha = 0.05$

Finding the sampling rate $p$ to achieve power $80\%, 90\%, 99\%$ for varying population sizes and vote margins.

In [3]:
def estimate_powers(popsize, margin, pi_step=0.2, pi_tol=2, lo_pi=0, reps=10000):
        """
        Estimate the sampling rates p1, p2, p3 to attain power 80%, 90%, and 99%,
        assuming only ballots for w and l were cast.
        
        popsize = integer population size
        margin = percent margin between w and l
        pi_step = initial linear search step size
        pi_tol = precision for estimating sampling rates. 
                 e.g. if pi_tol = 2, then only sampling rates 1%, 2%, ... are possible.
                 if pi_tol = 3, then sampling rates 1.1%, 1.2%, ... are possible.
        lo_pi = lowest sampling rate to try
        hi_pi = highest sampling rate to try
        """
        
        print('Running N='+str(popsize)+", margin="+str(margin))
        m = margin
        Nw = int(0.5*popsize*(m+1))
        Nl = popsize - Nw
        p1 = 1
        p2 = 1
        p3 = 1
        x = compute_unconditional_power(Nw=Nw, Nl=Nl, popsize=popsize, pi=lo_pi, alpha=alpha, reps=reps)
        
        lo80_pi = lo_pi
        lo90_pi = lo_pi
        hi_pi = lo_pi
        
        # Step out by pi_step sampling rates until getting at least 99% power
        while x < 0.99:
            if x < 0.9:
                lo90_pi = hi_pi
            if x < 0.8:
                lo80_pi = hi_pi
            hi_pi += pi_step
            x_lo = x
            if hi_pi > 1:
                break
            x = compute_unconditional_power(Nw=Nw, Nl=Nl, popsize=popsize, pi=hi_pi, alpha=alpha, reps=reps)
            print("trying pi=", hi_pi, ", power is ", x)
            if np.isnan(x):
                continue
        
        def bisect(lo_pi, hi_pi, desired_power):
            xmid = 1
            while xmid > desired_power and round(hi_pi-lo_pi, pi_tol)>0:
                mid_pi = round((lo_pi + hi_pi)/2, pi_tol)
                if mid_pi == lo_pi or mid_pi == hi_pi:
                    break
                x_mid = compute_unconditional_power(Nw=Nw, Nl=Nl, popsize=popsize, pi=mid_pi, alpha=alpha, reps=reps)
                print("trying pi=", mid_pi, ", power is ", x_mid)
                if x_mid > desired_power:
                    x = x_mid
                    hi_pi = mid_pi
                else:
                    x_lo = x_mid
                    lo_pi = mid_pi
            return hi_pi
        
        # Bisection method between pi and pi-pi_step to find where power is 99%
        p3 = bisect(hi_pi - pi_step, hi_pi, 0.99)
        
        # Bisection method between p3 (99% power) and lo90_pi to find where power is 90%
        p2 = bisect(lo90_pi, p3, 0.9)
        
        # Bisection method between p2 (90% power) and lo80_pi to find where power is 80%
        p1 = bisect(lo80_pi, p2, 0.8)

        return(p1, p2, p3)

In [4]:
simTable = pd.DataFrame(columns=("Pop size", "Margin", "p for 80% power","p for 90% power", "p for 99% power")
                       )

alpha = 0.05
N = [10**5, 10**6, 10**7]
power = [0.8, 0.9, 0.99]
margin = [0.01, 0.02, 0.05, 0.1, 0.2]
reps = 10000


# N = 10**5
res = estimate_powers(popsize=10**5, margin=0.01, pi_step=0.2, pi_tol=2, lo_pi=0.4, reps=reps)
simTable.loc[len(simTable)] = 10**5, 0.01, res[0], res[1], res[2]
for m in margin[1:]:
    res = estimate_powers(popsize=10**5, margin=m, pi_step=0.2, pi_tol=2, lo_pi=0, reps=reps)
    simTable.loc[len(simTable)] = 10**5, m, res[0], res[1], res[2]
    
# N = 10**6
for m in margin:
    res = estimate_powers(popsize=10**6, margin=m, pi_step=0.05, pi_tol=3, lo_pi=0, reps=reps)
    simTable.loc[len(simTable)] = 10**6, m, res[0], res[1], res[2]

# N = 10**7
for m in margin:
    res = estimate_powers(popsize=10**7, margin=m, pi_step=0.01, pi_tol=4, lo_pi=0, reps=reps)
    simTable.loc[len(simTable)] = 10**7, m, res[0], res[1], res[2]

Running N=100000, margin=0.01
trying pi= 0.6000000000000001 , power is  0.8718
trying pi= 0.8 , power is  0.9967
trying pi= 0.7 , power is  0.967
trying pi= 0.75 , power is  0.9857
trying pi= 0.78 , power is  0.9931
trying pi= 0.77 , power is  0.9917
trying pi= 0.76 , power is  0.9858
trying pi= 0.69 , power is  0.9566
trying pi= 0.65 , power is  0.9311
trying pi= 0.62 , power is  0.9002
trying pi= 0.61 , power is  0.8885
trying pi= 0.51 , power is  0.7528
trying pi= 0.56 , power is  0.8305
trying pi= 0.54 , power is  0.7945
trying pi= 0.55 , power is  0.8169
Running N=100000, margin=0.02
trying pi= 0.2 , power is  0.7398
trying pi= 0.4 , power is  0.974
trying pi= 0.6000000000000001 , power is  0.9996
trying pi= 0.5 , power is  0.9953
trying pi= 0.45 , power is  0.9899
trying pi= 0.47 , power is  0.9933
trying pi= 0.46 , power is  0.9915
trying pi= 0.33 , power is  0.9397
trying pi= 0.27 , power is  0.8716
trying pi= 0.3 , power is  0.906
trying pi= 0.29 , power is  0.897
trying pi= 0

In [5]:
simTable

Unnamed: 0,Pop size,Margin,p for 80% power,p for 90% power,p for 99% power
0,100000.0,0.01,0.55,0.62,0.77
1,100000.0,0.02,0.23,0.3,0.46
2,100000.0,0.05,0.05,0.07,0.12
3,100000.0,0.1,0.02,0.02,0.04
4,100000.0,0.2,0.01,0.01,0.01
5,1000000.0,0.01,0.104,0.142,0.242
6,1000000.0,0.02,0.029,0.04,0.075
7,1000000.0,0.05,0.005,0.007,0.013
8,1000000.0,0.1,0.002,0.002,0.004
9,1000000.0,0.2,0.001,0.001,0.001
