# Tools for SUITE Risk-Limiting Election Audits



In [22]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

import numpy as np
from ballot_comparison import ballot_comparison_pvalue
from fishers_combination import fisher_combined_pvalue, maximize_fisher_combined_pvalue, \
     bound_fisher_fun, calculate_lambda_range    
from sprt import ballot_polling_sprt

from cryptorandom.cryptorandom import SHA256
from cryptorandom.sample import sample_by_index

# Initial sample size

Reported Votes

In [2]:
Nw1 = 2000
Nl1 = 1900
Nw2 = 20000
Nl2 = 19000

N1 = Nw1 + Nl1
N2 = Nw2 + Nl2
Nw = Nw1 + Nw2
Nl = Nl1 + Nl2
N = N1 + N2
reported_margin = Nw - Nl

print("Smallest margin (in votes):", reported_margin)
print("Diluted margin:", (Nw - Nl)/N)

Smallest margin (in votes): 1100
Diluted margin: 0.02564102564102564


Audit parameters

In [3]:
alpha = 0.1
n_ratio = N1/N # proportion of initial sample allocated to stratum 1


Expected sample sizes

In [4]:
def estimate_n(risk_limit, n_ratio):
    n = 0
    expected_pvalue = 1
    while (expected_pvalue > risk_limit) or (expected_pvalue is np.nan):
        n = n + 1000
        n1 = int(n_ratio * n)
        n2 = n - n1
        cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, o1=0, 
                                                    u1=0, o2=0, u2=0, 
                                                    reported_margin=reported_margin, N=N1, 
                                                    null_lambda=alloc)
        nocvr_pvalue = lambda alloc: ballot_polling_sprt(sample= np.array([0]*int(n2*Nl2/N2)+\
                                             [1]*int(n2*Nw2/N2)+\
                                             [np.nan]*int(n2*(N2-Nl2-Nw2)/N2)), \
                            popsize=N2, \
                            alpha=0.05,  # set this param but we don't need to use it
                            Vw=Nw2, Vl=Nl2, null_margin=(Nw2-Nl2) - alloc*reported_margin)['pvalue']
        # Crude maximizer for now
        res = bound_fisher_fun(Nw1, Nl1, N1, Nw2, Nl2, N2,
                       pvalue_funs=(cvr_pvalue, nocvr_pvalue), stepsize=0.5, plausible_lambda_range=(-3, 3))
        expected_pvalue = np.max(res['upper_bounds'])
        if (n % 10000)==0:
            print(n, expected_pvalue)
    
    return (n1, n2)

interact(estimate_n,
          risk_limit=widgets.FloatSlider(min=0,max=0.5,step=0.005,value=alpha), 
          n_ratio=widgets.FloatSlider(min=0,max=1,step=0.01,value=n_ratio)
        )

10000 0.4850348339626901


(1727, 17273)

<function __main__.estimate_n>

# Random sampling

In [41]:
seed = 12345
n1 = 100
n2 = 15000

In [42]:
prng = SHA256(seed)
sample1 = sample_by_index(N1, n1, prng)
sample2 = sample_by_index(N2, n2, prng)

Stratum 1 sample

In [None]:
print("Stratum 1 sample:\n", sample1)

In [None]:
print("Stratum 1 sample, sorted:\n", np.sort(sample1))

In [None]:
print("Stratum 1 sample, sorted, duplicates removed:\n", np.unique(np.sort(sample1)))

In [None]:
m = np.zeros_like(sample1, dtype=bool)
m[np.unique(sample1, return_index=True)[1]] = True
print("Stratum 1 repeated ballots:\n", sample1[~m])

Stratum 2 sample

In [None]:
print("Stratum 2 sample:\n", sample2)

In [None]:
print("Stratum 2 sample, sorted:\n", np.sort(sample2))

In [None]:
print("Stratum 2 sample, sorted, duplicates removed:\n", np.unique(np.sort(sample2)))

In [None]:
m2 = np.zeros_like(sample2, dtype=bool)
m2[np.unique(sample2, return_index=True)[1]] = True
print("Stratum 2 repeated ballots:\n", sample2[~m2])

# Find ballots using ballot manifest

Ballot manifest: Each line must have a batch label, a comma, and one of the following:
  1. the number of ballots in the batch 
  1. a range specified with a colon (e.g., 131:302), or 
  1. a list of ballot identifiers within parentheses, separated by spaces (e.g., (996 998 1000)).
  
Each line should have exactly one comma.

In [37]:
# I'm imagining this is is a list for now
ballot_manifest = ['1, 100', '2, 101:200', '3, (205 210)']

In [38]:
# step 1: expand the ballot manifest into a dict. keys are batches, values are ballot numbers.
ballot_manifest_dict = {}

# loop over inputs
for i in ballot_manifest:
    # assert that the entry is a string with a comma in it
    
    # pull out batch label
    (batch, val) = i.split(",")
    batch = batch.strip()
    val = val.strip()
    
    if not (batch in ballot_manifest_dict.keys()):
        ballot_manifest_dict[batch] = []
    
    # parse what comes after the batch label
    if '(' in val:
        val = val[1:-1] # strip out the parentheses
        ballot_manifest_dict[batch] += [int(num) for num in val.split()]
    
    elif ':' in val:
        limits = val.split(':')
        ballot_manifest_dict[batch] += list(range(int(limits[0]), int(limits[1])+1))
        
    else:
        try:
            if len(ballot_manifest_dict[batch]):
                minval = len(ballot_manifest_dict[batch]) + 1
            else:
                minval = 1
            ballot_manifest_dict[batch] += list(range(minval, minval+int(val)))
        except:
            print("The ballot manifest has been entered improperly.")


In [None]:
# step 2: look up sample values

def find_ballot(ballot_num):
    for batch, ballots in ballot_manifest_dict.items():
        if ballot_num in ballots:
            position = ballots.index(ballot_num)
            return batch, position
    print("Ballot %i not found" % ballot_num)
    return None

print("sorted number, ballot, batch_label, which_ballot_in_batch")
i = 0
for s in sample1:
    i += 1
    batch_label, which_ballot = find_ballot(s)
    print(i, s, batch_label, which_ballot) # This uses 0-indexing still. Should we change it be 1,...,n?

# Should more ballots be audited?

Sample statistics for the CVR stratum (stratum 1)

In [None]:
# Simple version, fill in the blanks

o1 = 
u1 = 
o2 = 
u2 = 

In [23]:
# Tricky version, input using sliders

def stratum1_inputs(o1, u1, o2, u2):
    return (o1, u1, o2, u2)

stratum1_stats = interactive(stratum1_inputs, 
                             o1 = widgets.IntSlider(min=0,max=n1,value=0),
                             u1 = widgets.IntSlider(min=0,max=n1,value=0),
                             o2 = widgets.IntSlider(min=0,max=n1,value=0),
                             u2 = widgets.IntSlider(min=0,max=n1,value=0))
display(stratum1_stats)

(0, 0, 0, 0)

In [31]:
(o1, u1, o2, u2) = [stratum1_stats.children[i].value for i in range(4)]

0


Sample statistics for the no-CVR stratum (stratum 2)

In [None]:
# Simple version, fill in the blanks


n2l =
n2w = 

In [35]:
# Tricky version, input using sliders

def stratum2_inputs(ballots_for_loser, ballots_for_winner):
    return (ballots_for_loser, ballots_for_winner)

stratum2_stats = interactive(stratum2_inputs, 
                             ballots_for_loser = widgets.IntSlider(min=0,max=n2,value=0),
                             ballots_for_winner = widgets.IntSlider(min=0,max=n2,value=0))
display(stratum2_stats)

(0, 0)

In [36]:
(n2l, n2w) = [stratum2_stats.children[i].value for i in range(2)]

In [None]:
cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, o1=o1, 
                                                    u1=u1, o2=o2, u2=u2, 
                                                    reported_margin=reported_margin, N=N1, 
                                                    null_lambda=alloc)
nocvr_pvalue = lambda alloc: ballot_polling_sprt(sample= np.array([0]*n2l+[1]*n2w+[np.nan]*(n2-n2w-n2l)), \
                            popsize=N2, \
                            alpha=0.05,  # set this param but we don't need to use it
                            Vw=Nw2, Vl=Nl2, null_margin=(Nw2-Nl2) - alloc*reported_margin)['pvalue']
# Crude maximizer for now
res = bound_fisher_fun(Nw1, Nl1, N1, Nw2, Nl2, N2,
                       pvalue_funs=(cvr_pvalue, nocvr_pvalue), stepsize=0.5, plausible_lambda_range=(-3, 3))
expected_pvalue = np.max(res['upper_bounds'])
if expected_pvalue <= alpha:
    print("Stop the audit")
else:
    print("Escalate the audit")