# Tools for SUITE Risk-Limiting Election Audits



In [1]:
from __future__ import print_function

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

from collections import OrderedDict
from itertools import product
import math

import numpy as np
from ballot_comparison import ballot_comparison_pvalue
from fishers_combination import  maximize_fisher_combined_pvalue, create_modulus
from sprt import ballot_polling_sprt

from cryptorandom.cryptorandom import SHA256
from cryptorandom.sample import sample_by_index

from suite_tools import check_valid_audit_parameters, check_valid_vote_counts, \
        find_winners_losers, print_reported_votes, \
        estimate_n, estimate_escalation_n, \
        parse_manifest, unique_manifest, find_ballot, \
        audit_contest
        

  return f(*args, **kwds)


In [2]:
# global audit parameters

seed = 12345678901234567890  # use, e.g., 20 rolls of a 10-sided die
risk_limit = 0.05    # risk limit

gamma=1.03905         # gamma from Lindeman and Stark (2012)

lambda_step = 0.05    # stepsize for the discrete bounds on Fisher's combining function

# assumptions for finding initial sample sizes

o1_rate = 0.002       # expect 2 1-vote overstatements per 1000 ballots in the CVR stratum
o2_rate = 0           # expect 0 2-vote overstatements
u1_rate = 0           # expect 0 1-vote understatements
u2_rate = 0           # expect 0 2-vote understatements

stratum_sizes = [100000, 5000]  # total ballots in the two strata, CVR, no-CVR

n_ratio = stratum_sizes[0]/np.sum(stratum_sizes) 
                     # allocate sample in proportion to ballots cast in each stratum

# contest-specific parameters
num_winners = 2       # maximum number of winners, per social choice function

In [3]:
check_valid_audit_parameters(risk_limit, lambda_step, o1_rate, o2_rate, \
                                 u1_rate, u2_rate, stratum_sizes, n_ratio, num_winners)

# Enter the reported votes

In [4]:
# input number of winners
# input names as well as reported votes in each stratum

# candidates are a dict with name, [votes_in_stratum_1, votes_in_stratum_2]
candidates = { "candidate 3": [30000, 500],
               "candidate 2": [50000, 1000],
               "candidate 1": [10000, 500],
               "candidate 4": [500, 10]}

# Run check on the input vote totals
check_valid_vote_counts(candidates, stratum_sizes)

In [5]:
# compute and print, reported winners, losers, and pairwise margins
(candidates, margins, winners, losers) = find_winners_losers(candidates, num_winners)
    
print_reported_votes(candidates, winners, losers, margins, stratum_sizes)


Total reported votes:
			CVR	no-CVR	total
	 candidate 2 : 50000 	 1000 	 51000
	 candidate 3 : 30000 	 500 	 30500
	 candidate 1 : 10000 	 500 	 10500
	 candidate 4 : 500 	 10 	 510

	 total votes:	 90500 	 2010 	 92510

	 non-votes:	 9500 	 2990 	 12490

winners:
	 candidate 2
	 candidate 3

losers:
	 candidate 1
	 candidate 4


margins:
	 candidate 2 beat candidate 4 by 50490 votes
	 candidate 2 beat candidate 1 by 40500 votes
	 candidate 3 beat candidate 4 by 29990 votes
	 candidate 3 beat candidate 1 by 20000 votes

smallest margin: 20000 
diluted margin: 0.19047619047619047


# Initial sample size

In [6]:
# Calculate expected sample size across (winner, loser) pairs

sample_sizes = {}

for k in product(winners, losers):
    sample_sizes[k] = estimate_n(N_w1 = candidates[k[0]][0],\
                                 N_w2 = candidates[k[0]][1],\
                                 N_l1 = candidates[k[1]][0],\
                                 N_l2 = candidates[k[1]][1],\
                                 N1 = stratum_sizes[0],\
                                 N2 = stratum_sizes[1],\
                                 o1_rate = o1_rate,\
                                 o2_rate = o2_rate,\
                                 u1_rate = u1_rate,\
                                 u2_rate = u2_rate,\
                                 n_ratio = n_ratio,\
                                 risk_limit = risk_limit,\
                                 gamma = gamma,\
                                 stepsize = lambda_step)

In [7]:
sample_size = np.amax([v[0]+v[1] for v in sample_sizes.values()])
n1 = math.ceil(sample_size*n_ratio)    
n2 = sample_size-n1

print(sample_sizes, '\n\nexpected minimum sample size:', sample_size)

{('candidate 2', 'candidate 4'): (29, 1), ('candidate 2', 'candidate 1'): (34, 1), ('candidate 3', 'candidate 4'): (58, 2), ('candidate 3', 'candidate 1'): (58, 2)} 

expected minimum sample size: 60


# Random sampling

If this section is giving errors, you probably need to update your version of `cryptorandom`.

```
pip install [--update] cryptorandom
```

In [8]:
prng = SHA256(seed)   # initialize the PRNG

In [9]:
# CVR stratum initial sample size, sampled with replacement
sample1 = prng.randint(1, stratum_sizes[0]+1, size=n1)

# No-CVR ballots are sampled without replacement
sample2 = sample_by_index(stratum_sizes[1], n2, prng)

### Stratum 1 sample

In [10]:
print("CVR stratum sample:\n", sample1)

CVR stratum sample:
 [76116 45424 33501 45326  2081 56264 25122 16602 79743 61814 57922 41676
 95332 38891 17757 64352 84257 47365 10908 97791 77941 73573 51855 88527
 35549 20934 61419 70683 70220 45067 67903 94304 20823 50570 88735  9973
 44578 34320  8262 32532 85102 87511 63375 96612 52917 91127 84152 74227
 76674 76640 62444 83868  3974 81503 82205 41161 28136 12244]


In [11]:
print("CVR stratum sample, sorted:\n", np.sort(sample1))

CVR stratum sample, sorted:
 [ 2081  3974  8262  9973 10908 12244 16602 17757 20823 20934 25122 28136
 32532 33501 34320 35549 38891 41161 41676 44578 45067 45326 45424 47365
 50570 51855 52917 56264 57922 61419 61814 62444 63375 64352 67903 70220
 70683 73573 74227 76116 76640 76674 77941 79743 81503 82205 83868 84152
 84257 85102 87511 88527 88735 91127 94304 95332 96612 97791]


In [12]:
print("CVR stratum sample, sorted, duplicates removed:\n", np.unique(np.sort(sample1)))

CVR stratum sample, sorted, duplicates removed:
 [ 2081  3974  8262  9973 10908 12244 16602 17757 20823 20934 25122 28136
 32532 33501 34320 35549 38891 41161 41676 44578 45067 45326 45424 47365
 50570 51855 52917 56264 57922 61419 61814 62444 63375 64352 67903 70220
 70683 73573 74227 76116 76640 76674 77941 79743 81503 82205 83868 84152
 84257 85102 87511 88527 88735 91127 94304 95332 96612 97791]


In [13]:
m = np.zeros_like(sample1, dtype=bool)
m[np.unique(sample1, return_index=True)[1]] = True
print("Stratum 1 repeated ballots:\n", sample1[~m])

Stratum 1 repeated ballots:
 []


### Stratum 2 sample

In [14]:
print("No-CVR stratum sample:\n", sample2)

No-CVR stratum sample:
 [1133 4784]


In [15]:
print("No-CVR stratum sample, sorted:\n", np.sort(sample2))

No-CVR stratum sample, sorted:
 [1133 4784]


# Find ballots using ballot manifest

Ballot manifest: Each line must have a batch label, a comma, and one of the following:
  1. the number of ballots in the batch 
  1. a range specified with a colon (e.g., 131:302), or 
  1. a list of ballot identifiers within parentheses, separated by spaces (e.g., (996 998 1000)).
  
Each line should have exactly one comma.

In [16]:
# I'm imagining this is is a list for now
ballot_manifest_cvr = ['1, 10000', '2, 10001:99998', '3, (205 210)']
ballot_manifest_poll = ['1, 1000', '2, 1001:4998', '3, (205 210)']

In [17]:
# step 1: expand the ballot manifest into a dict. keys are batches, values are ballot numbers.
cvr_manifest_parsed = parse_manifest(ballot_manifest_cvr)
poll_manifest_parsed = parse_manifest(ballot_manifest_poll)

In [18]:
# count ballots listed in the manifests
listed_cvr = np.sum([len(v) for v in cvr_manifest_parsed.values()])
listed_poll = np.sum([len(v) for v in poll_manifest_parsed.values()])

# test that manifest matches reported ballot totals

assert listed_cvr == stratum_sizes[0]
assert listed_poll == stratum_sizes[1]

In [19]:
# step 2: give ballots unique IDs

unique_cvr_manifest = unique_manifest(cvr_manifest_parsed)
unique_poll_manifest = unique_manifest(poll_manifest_parsed)

In [20]:
# step 3: look up sample values

print("CVR Stratum")
print("sampled ballot, original ballot label, batch label, which ballot in batch")
i = 0
for s in sample1:
    i += 1
    original_ballot_label, batch_label, which_ballot = find_ballot(s, \
                                                                   unique_cvr_manifest, \
                                                                   cvr_manifest_parsed)
    print(s, original_ballot_label, batch_label, which_ballot)

CVR Stratum
sampled ballot, original ballot label, batch label, which ballot in batch
76116 76117 2 66116
45424 45425 2 35424
33501 33502 2 23501
45326 45327 2 35326
2081 2082 1 2081
56264 56265 2 46264
25122 25123 2 15122
16602 16603 2 6602
79743 79744 2 69743
61814 61815 2 51814
57922 57923 2 47922
41676 41677 2 31676
95332 95333 2 85332
38891 38892 2 28891
17757 17758 2 7757
64352 64353 2 54352
84257 84258 2 74257
47365 47366 2 37365
10908 10909 2 908
97791 97792 2 87791
77941 77942 2 67941
73573 73574 2 63573
51855 51856 2 41855
88527 88528 2 78527
35549 35550 2 25549
20934 20935 2 10934
61419 61420 2 51419
70683 70684 2 60683
70220 70221 2 60220
45067 45068 2 35067
67903 67904 2 57903
94304 94305 2 84304
20823 20824 2 10823
50570 50571 2 40570
88735 88736 2 78735
9973 9974 1 9973
44578 44579 2 34578
34320 34321 2 24320
8262 8263 1 8262
32532 32533 2 22532
85102 85103 2 75102
87511 87512 2 77511
63375 63376 2 53375
96612 96613 2 86612
52917 52918 2 42917
91127 91128 2 81127
84152 8

In [21]:
print("Polling Stratum")
print("sampled ballot, original ballot label, batch label, which ballot in batch")
i = 0
for s in sample2:
    i += 1
    original_ballot_label, batch_label, which_ballot = find_ballot(s, \
                                                                   unique_poll_manifest, \
                                                                   poll_manifest_parsed)
    print(i, s, batch_label, which_ballot)

Polling Stratum
sampled ballot, original ballot label, batch label, which ballot in batch
1 1133 2 133
2 4784 2 3784


# Enter the sample data

Sample statistics for the CVR stratum (stratum 1)

In [22]:
# Number of observed...
n1 = 60
o1 = 5 # 1-vote overstatements
o2 = 0 # 2-vote overstatements
u1 = 0 # 1-vote understatements
u2 = 0 # 2-vote understatements

Sample statistics for the no-CVR stratum (stratum 2)

In [23]:
# Number of votes for each candidate
# recall that in the provided example, n2=3 so the totals here must add up to <= 3.
n2=3

# no-CVR sample is stored in a dict with name, votes in the sample
observed_poll = { "candidate 3": 1,
               "candidate 2": 2,
               "candidate 1": 0,
               "candidate 4": 0}

# What's the risk for this sample?

In [24]:
# Find audit p-values across (winner, loser) pairs

audit_pvalues = audit_contest(candidates, winners, losers, stratum_sizes, \
                  n1, n2, o1, o2, u1, u2, observed_poll, \
                  risk_limit=risk_limit, gamma=gamma, stepsize=lambda_step)
audit_pvalues

{('candidate 2', 'candidate 1'): 0.0008703324591761152,
 ('candidate 2', 'candidate 4'): 3.311380205428538e-05,
 ('candidate 3', 'candidate 1'): 0.23272911575919975,
 ('candidate 3', 'candidate 4'): 0.01806179900603555}

# Escalation guidance: how many more ballots should be drawn?

In [25]:
sample_sizes_new = {}

for k in product(winners, losers):
    sample_sizes_new[k] = estimate_escalation_n(\
                                 N_w1 = candidates[k[0]][0],\
                                 N_w2 = candidates[k[0]][1],\
                                 N_l1 = candidates[k[1]][0],\
                                 N_l2 = candidates[k[1]][1],\
                                 N1 = stratum_sizes[0],\
                                 N2 = stratum_sizes[1],\
                                 n1 = n1,\
                                 n2 = n2,\
                                 o1_obs = o1,\
                                 o2_obs = o2,\
                                 u1_obs = u1,\
                                 u2_obs = u2,\
                                 n2l_obs = observed_poll[k[1]],\
                                 n2w_obs = observed_poll[k[0]],\
                                 o1_rate = o1_rate,\
                                 o2_rate = o2_rate,\
                                 u1_rate = u1_rate,\
                                 u2_rate = u2_rate,\
                                 n_ratio = n_ratio,\
                                 risk_limit = risk_limit,\
                                 gamma = gamma,\
                                 stepsize = lambda_step)

In [26]:
## TODO: Check that we like this sort of output

sample_size_new = np.amax([v[0]+v[1] for v in sample_sizes_new.values()])
n1_new = np.amax([v[0] for v in sample_sizes_new.values()])
n2_new = np.amax([v[1] for v in sample_sizes_new.values()])

print(sample_sizes_new, '\n\nExpected minimum sample size:', sample_size_new)
print("\nBallots to draw in the CVR stratum:", n1_new - n1)
print("Ballots to draw in the no-CVR stratum:", n2_new - n2)

{('candidate 2', 'candidate 4'): (63, 3), ('candidate 2', 'candidate 1'): (63, 3), ('candidate 3', 'candidate 4'): (63, 3), ('candidate 3', 'candidate 1'): (87, 4)} 

Expected minimum sample size: 91

Ballots to draw in the CVR stratum: 27
Ballots to draw in the no-CVR stratum: 1
