In [1]:
from __future__ import division, print_function
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import math
import numpy as np
import numpy.random
import scipy as sp
import scipy.stats

from ballot_comparison import findNmin_ballot_comparison_rates, ballot_comparison_pvalue
from hypergeometric import trihypergeometric_optim, simulate_ballot_polling_power
from fishers_combination import fisher_combined_pvalue, maximize_fisher_combined_pvalue, \
     create_modulus
from sprt import ballot_polling_sprt
from suite_tools import estimate_n

# Example of a hybrid audit in a large election with an exact tie

The numbers in this example may not be realistic, but serve as an example of what would happen in a tie election.

There are two strata. One contains every CVR county and the other contains every no-CVR county.
There were 2 million ballots cast in the election, 1.9 million in the CVR stratum and 100,000 in the no-CVR stratum.

The reported vote margin between A and B is 22,000 votes, a "diluted margin" of just over $1\%$.


Candidate | Reported Stratum 1 |  Reported Stratum 2 | Reported total 
---|---|---|---
A | 960,000 | 51,000 | 1,011,000
B | 940,000 | 49,000 | 989,000
Ballots | 1,900,000 | 100,000 | 2,000,000
Diluted margin | 1.05% | 2% | 1.1%

Assume there are only ballots for A and B.
In each stratum, the A reportedly received more votes than B, but they *actually* received exactly the same number.
In the CVR stratum, there were 960,000 votes reported for A and 940,000 votes reported for candidate B, a margin of 20,000 votes.
In the no-CVR stratum, there were 51,000 votes reported for A and 49,000 votes for B, a margin of 2,000 votes.

If for every $\lambda$, we can reject the conjunction hypothesis that the overstatement of the reported 
margin in the CVR stratum is $22000\lambda$ votes or more *and* the overstatement of the reported margin in the no-CVR stratum $22000(1-\lambda)$ votes or more, the audit can stop. 

In [2]:
alpha = 0.05
reported_margin = 22000
actual_margin = 0
N1 = 1900000
N2 = 100000
N_w1 = 960000
N_w2 = 51000
N_l1 = 940000
N_l2 = 49000

# Using Fisher's method to combine audits

First, we use `estimate_n` to determine the initial sample size to draw.

In [3]:
estimate_n(N_w1=N_w1, N_w2=N_w2, N_l1=N_l1, N_l2=N_l2, \
           N1=N1, N2=N2, risk_limit=alpha, \
           verbose=True)

...trying... 10 1.0
...trying... 20 1.0
...trying... 40 0.9991395133874198
...trying... 80 0.9966526267389898
...trying... 160 0.9873324507594201
...trying... 320 0.9545922806292406
...trying... 640 0.8534201017179187
...trying... 1280 0.6102596535828426
...trying... 2560 0.17980765899844742
...trying... 5120 0.016190046740217845
...trying... 3840.0 0.06180031473137937
...trying... 4480.0 0.028170827460459513
...trying... 4160.0 0.03701710328144692
...trying... 4000.0 0.04238731352954417


(3800, 200)

Even if we use large sample sizes in each stratum, the conclusion of Fisher's method will be to escalate the audit. In this case, we use sample sizes of $3800$ in the CVR stratum and $200$ in the no-CVR stratum. The Fisher's combined $p$-value has a maximum value of $100\%$, so we cannot confirm the reported results at risk limit $5\%$.

In [4]:
n1 = 3800
n2 = 200

cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, o1=0, u1=0, 
                                                    o2=int(0.005*n1), u2=0, 
                                                    reported_margin=reported_margin, N=N1, 
                                                    null_lambda=alloc)
nocvr_pvalue = lambda alloc: \
    ballot_polling_sprt(sample= np.array([0]*int(n2*0.5)+\
                                             [1]*int(n2*0.5)+\
                                             [np.nan]*0), \
                            popsize=N2, \
                            alpha=0.05,  # set this param but we don't need to use it
                            Vw=N_w2, \
                            Vl=N_l2, \
                            null_margin=(N_w2-N_l2) - alloc*reported_margin)['pvalue']

mod = create_modulus(n1=n1, n2=n2, n_w2=int(n2*0.5), n_l2=int(n2*0.5), 
                     N1=N1, V_wl=reported_margin, gamma=1.03905)



In [5]:
maximize_fisher_combined_pvalue(N_w1, N_l1, N1, N_w2, N_l2, N2,
                                pvalue_funs = (cvr_pvalue, nocvr_pvalue), 
                                stepsize=0.05, modulus=None, alpha=0.05, 
                                feasible_lambda_range=(-10, 10))

{'max_pvalue': 1.0,
 'min_chisq': 0.0,
 'allocation lambda': 0.8500000000001542,
 'tol': None,
 'stepsize': 0.05,
 'refined': False}

Doubling the sample sizes in each stratum doesn't help:

In [6]:
n1 = 7600
n2 = 400

cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, o1=0, u1=0, 
                                                    o2=int(0.005*n1), u2=0, 
                                                    reported_margin=reported_margin, N=N1, 
                                                    null_lambda=alloc)
nocvr_pvalue = lambda alloc: \
    ballot_polling_sprt(sample= np.array([0]*int(n2*0.5)+\
                                             [1]*int(n2*0.5)+\
                                             [np.nan]*0), \
                            popsize=N2, \
                            alpha=0.05,  # set this param but we don't need to use it
                            Vw=N_w2, \
                            Vl=N_l2, \
                            null_margin=(N_w2-N_l2) - alloc*reported_margin)['pvalue']

mod = create_modulus(n1=n1, n2=n2, n_w2=int(n2*0.5), n_l2=int(n2*0.5), 
                     N1=N1, V_wl=reported_margin, gamma=1.03905)



In [7]:
maximize_fisher_combined_pvalue(N_w1, N_l1, N1, N_w2, N_l2, N2,
                                pvalue_funs = (cvr_pvalue, nocvr_pvalue), 
                                stepsize=0.05, modulus=None, alpha=0.05, 
                                feasible_lambda_range=(-10, 10))

{'max_pvalue': 1.0,
 'min_chisq': 0.0,
 'allocation lambda': 0.8500000000001542,
 'tol': None,
 'stepsize': 0.05,
 'refined': False}