In [1]:
from __future__ import division
import math
import numpy as np
import numpy.random
import scipy as sp
import scipy.stats


In [3]:
def ballot_comparison(n, gamma, o1, u1, o2, u2, reported_margin, N, null_lambda=1):
    """
    Compute the p-value for a ballot comparison audit
    
    Parameters
    ----------
    n : int
        sample size
    gamma : float
        value > 1 to inflate the allowable error
    o1 : int
        number of ballots that overstate any 
        margin by one vote but no margin by two votes
    u1 : int
        number of ballots that understate any margin by 
        exactly one vote, and every margin by at least one vote
    o2 : int
        number of ballots that overstate any margin by two votes
    u2 : int
        number of ballots that understate every margin by two votes
    reported_margin : float
        the smallest reported margin *in votes* between a winning
        and losing candidate
    N : int
        number of votes cast
    null_lambda : float
        value that describes the null difference between reported and actual votes

    Returns
    -------
    pvalue
    """
    U = 2*N/(reported_margin/null_lambda)
    log_pvalue = n*np.log(1-1/U) - o1*np.log(1 - 1/(2*gamma)) - \
                    o2*np.log(1 - 1/gamma) - \
                    u1*np.log(1 + 1/(2*gamma)) - \
                    u2*np.log(1 + 1/gamma)
    pvalue = np.exp(log_pvalue)
    return pvalue


def findNmin_ballot_comparison(alpha, gamma, o1, u1, o2, u2,
                                reported_margin, N, null_lambda=1):

    """
    Compute the smallest sample size for which a ballot comparison 
    audit with the given statistics could stop
    
    Parameters
    ----------
    alpha : float
        risk limit
    gamma : float
        value > 1 to inflate the allowable error
    o1 : int
        number of ballots that overstate any 
        margin by one vote but no margin by two votes
    u1 : int
        number of ballots that understate any margin by 
        exactly one vote, and every margin by at least one vote
    o2 : int
        number of ballots that overstate any margin by two votes
    u2 : int
        number of ballots that understate every margin by two votes
    reported_margin : float
        the smallest reported margin *in votes* between a winning
        and losing candidate
    N : int
        number of votes cast
    null_lambda : float
        value that describes the null difference between reported and actual votes
        
    Returns
    -------
    n
    """
    m = (reported_margin/null_lambda)/N
    val = -2*gamma/m * (np.log(alpha) +
                o1*np.log(1 - 1/(2*gamma)) + \
                o2*np.log(1 - 1/gamma) + \
                u1*np.log(1 + 1/(2*gamma)) + \
                u2*np.log(1 + 1/gamma) )
    val2 = o1+o2+u1+u2
    return np.max([int(val)+1, val2])


def findNmin_ballot_comparison_rates(alpha, gamma, r1, s1, r2, s2,
                                reported_margin, N, null_lambda=1):

    """
    Compute the smallest sample size for which a ballot comparison 
    audit with the given statistics could stop
    
    Parameters
    ----------
    alpha : float
        risk limit
    gamma : float
        value > 1 to inflate the allowable error
    r1 : int
        rate of ballots that overstate any 
        margin by one vote but no margin by two votes
    s1 : int
        rate of ballots that understate any margin by 
        exactly one vote, and every margin by at least one vote
    r2 : int
        rate of ballots that overstate any margin by two votes
    s2 : int
        rate of ballots that understate every margin by two votes
    reported_margin : float
        the smallest reported margin *in votes* between a winning
        and losing candidate
    N : int
        number of votes cast
    null_lambda : float
        value that describes the null difference between reported and actual votes
        
    Returns
    -------
    n
    """
    m = (reported_margin/null_lambda)/N

    denom = (np.log(1 - m/(2*gamma)) -
                r1*np.log(1 - 1/(2*gamma))- \
                r2*np.log(1 - 1/gamma) - \
                s1*np.log(1 + 1/(2*gamma)) - \
                s2*np.log(1 + 1/gamma) )
    if denom < 0:
        n0 = np.log(alpha)/denom
    else:
        n0 = N
    return int(n0)


In [4]:
# unit tests from "A Gentle Introduction..."

assert ballot_comparison(80, 1.03905, 0,1,0,0,5,100) < 0.1
assert ballot_comparison(96, 1.03905, 0,0,0,0,5,100) < 0.1
assert findNmin_ballot_comparison(0.1, 1.03905, 0,1,0,0,5,100) == 80
assert findNmin_ballot_comparison(0.1, 1.03905, 0,0,0,0,5,100) == 96