# Athena audits: demo and state results
Work out test cases for multi-round audits.

Show usage of the Audit class.

Demo 2016 presidential contest in selected states

## Setup and define some utilities

In [1]:
from athena.audit import Audit
import math
import json
import sys

In [2]:
# Run this any time for fast turnaround of changes to the library 
from importlib import reload
import athena.audit
reload(athena.audit)
from athena.audit import Audit

In [3]:
def make_audit(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule):
    "Convenience function to make Audit instance with given election parameters"

    election = {
        "alpha": alpha,
        "delta": delta,
        "candidates": candidates,
        "results": results,
        "ballots_cast": ballots_cast,
        "winners": winners,
        "name": name,
        "model": model,
        "pstop_goal": pstop_goal,
    }
    a = Audit(audit_type, election['alpha'], election['delta'])
    a.add_election(election)
    a.add_round_schedule(round_schedule)
    return a

In [4]:
def find_next_round_size(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule):
    "Convenience function to call a fresh Audit instance with given election parameters"

    election = {
        "alpha": alpha,
        "delta": delta,
        "candidates": candidates,
        "results": results,
        "ballots_cast": ballots_cast,
        "winners": winners,
        "name": name,
        "model": model,
        "pstop_goal": pstop_goal,
    }
    a = Audit(audit_type, election['alpha'], election['delta'])
    a.add_election(election)
    a.add_round_schedule(round_schedule)
    x = a.find_next_round_size(election['pstop_goal'])
    return x

In [5]:
def sample90(margin, audit_type="ATHENA"):
    "Return sample size and other output given margin"

    assert 0.0 < margin < 1.0
    ballots_cast = 10000
    margin_votes = round(margin * ballots_cast)
    b = ballots_cast//2 - margin_votes // 2
    a = b + int(margin_votes)
    x = find_next_round_size(audit_type, 0.1, 1.0, ["A", "B"], [a, b], ballots_cast, 1, "state", "bin", [0.9], [])
    return (x['detailed']['A-B']['next_round_sizes'][0], x)

In [6]:
def sample90v(a, b, ballots_cast, audit_type="ATHENA"):
    "Return sample size etc. given votes for each of top two candidates in 1-winner contest"

    winner = max(a,b)
    loser = min(a,b)
    assert 0 < loser < winner < ballots_cast
    x = find_next_round_size(audit_type, 0.1, 1.0, ["A", "B"], [winner, loser], ballots_cast, 1, "state", "bin", [0.9], [])
    return (x['detailed']['A-B']['next_round_sizes'][0], x)

In [7]:
# Define a class to avoid cluttering notebook with stdout
class redirect_output(object):
    """context manager for reditrecting stdout/err to files"""


    def __init__(self, stdout='', stderr=''):
        self.stdout = stdout
        self.stderr = stderr

    def __enter__(self):
        self.sys_stdout = sys.stdout
        self.sys_stderr = sys.stderr

        if self.stdout:
            sys.stdout = open(self.stdout, 'w')
        if self.stderr:
            if self.stderr == self.stdout:
                sys.stderr = sys.stdout
            else:
                sys.stderr = open(self.stderr, 'w')

    def __exit__(self, exc_type, exc_value, traceback):
        sys.stdout = self.sys_stdout
        sys.stderr = self.sys_stderr

In [8]:
# Print all the current properties and values of an object
# From https://stackoverflow.com/a/59128615/507544
from pprint import pprint
from inspect import getmembers
from types import FunctionType

def attributes(obj):
    disallowed_names = {
      name for name, value in getmembers(type(obj))
        if isinstance(value, FunctionType)}
    return {
      name: getattr(obj, name) for name in dir(obj) 
        if name[0] != '_' and name not in disallowed_names and hasattr(obj, name)}

def print_attributes(obj):
    "print all the current properties and values of an object"

    pprint(attributes(obj))

# Basic demo of multi-round Audit
Simple recipe for test with multiple rounds: exactly half of selected ballots are for declared winner, naively leading to an ever-escalating audit

In [9]:
audit_type = "ATHENA"
alpha = 0.1
delta = 1.0
candidates = ["A", "B"]
results = [60000, 40000]
ballots_cast = 100000
winners = 1
name = "test_election"
model = "bin"
pstop_goal = [.7, .9]
round_schedule = []

In [10]:
a = make_audit(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule)

## Round 1: select 112 ballots (70% stopping probability), 56 of which are for winner

In [11]:
x = a.find_next_round_size(pstop_goal)

In [12]:
x

{'detailed': {'A-B': {'pstop_goal': [0.7, 0.9],
   'next_round_sizes': [112, 184],
   'prob_stop': [0.7002755397469657, 0.9092067701297123]}},
 'future_round_sizes': [112, 184]}

Take the first offered sample size, for 70% stopping probability

In [13]:
sample_size = x['future_round_sizes'][0]

In [14]:
sample_size

112

In [15]:
a.add_round_schedule([sample_size])

In [16]:
winner_shares = [sample_size // 2]

In [17]:
r = a.find_risk(winner_shares)

In [18]:
r

{'risk': 0.5444829381038225,
 'delta': 9.835828548017783,
 'passed': 0,
 'observed': [56],
 'required': [65]}

In [19]:
def next_round(a, winner_shares, r):
    below_kmin = max(r['required']) - max(r['observed'])
    x = a.find_next_round_size(pstop_goal)
    incremental_round_sizes = list(map(lambda x: x - max(a.round_schedule) + 2 * below_kmin, x['future_round_sizes']))
    incremental_sample_size = incremental_round_sizes[0]
    a.add_round_schedule(a.round_schedule + [max(a.round_schedule) + incremental_sample_size])
    next_total_winner_share = a.round_schedule[-1] // 2
    winner_shares += [next_total_winner_share]
    print(f'Next round: select {incremental_sample_size} more ballots, next total winner share is {next_total_winner_share}')
    r = a.find_risk(winner_shares)
    return r

## Round 2: select 132 more ballots, half of which are for winner

In [20]:
r = next_round(a, winner_shares, r)

Next round: select 132 more ballots, next total winner share is 122


In [21]:
a.round_schedule

[112, 244]

In [22]:
r

{'risk': 0.5444829381038225,
 'delta': 145.5156049638303,
 'passed': 0,
 'observed': [56, 122],
 'required': [65, 137]}

## Round 3: select 174 more ballots, half for winner

In [23]:
r = next_round(a, winner_shares, r)

Next round: select 174 more ballots, next total winner share is 209


In [24]:
a.round_schedule

[112, 244, 418]

In [25]:
r

{'risk': 0.5444829381038225,
 'delta': 5073.554088943761,
 'passed': 0,
 'observed': [56, 122, 209],
 'required': [65, 137, 232]}

## Re-imagine last final winner share
`passed` should be true with the required winner share, false with one less

In [26]:
a.find_risk(winner_shares[:-1] + [r['required'][-1]])

{'risk': 0.08229447087655635,
 'delta': 0.45207796198550587,
 'passed': 1,
 'observed': [56, 122, 232],
 'required': [65, 137, 232]}

In [27]:
a.find_risk(winner_shares[:-1] + [r['required'][-1] - 1])

{'risk': 0.10984559911191313,
 'delta': 0.6781169429782573,
 'passed': 0,
 'observed': [56, 122, 231],
 'required': [65, 137, 232]}

# Demo with 100 irrelevant ballots
Show a bit of performance / timing info also

FIXME: Needs more checking and updating, I think....

In [28]:
audit_type = "ATHENA"
alpha = 0.1
delta = 1.0
candidates = ["A", "B"]
results = [600, 300]
ballots_cast = 1000
winners = 1
name = "test_election"
model = "bin"
pstop_goal = [.5, .7, .9]
round_schedule = []

In [29]:
a = make_audit(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule)

In [30]:
%time x = a.find_next_round_size(pstop_goal)

CPU times: user 37.6 ms, sys: 0 ns, total: 37.6 ms
Wall time: 37.6 ms


In [31]:
x

{'detailed': {'A-B': {'pstop_goal': [0.5, 0.7, 0.9],
   'next_round_sizes': [32, 48, 69],
   'prob_stop': [0.535513313605613, 0.7610118463711638, 0.9021135361161704]}},
 'future_round_sizes': [32, 48, 69]}

In [32]:
%%timeit
x = a.find_next_round_size(pstop_goal)

24.1 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
sample_size = x['future_round_sizes'][0]

In [34]:
a.add_round_schedule([sample_size])

In [35]:
a.round_schedule

[32]

In [36]:
r = a.find_risk([sample_size // 2])

In [37]:
r

{'risk': 0.5794753786910353,
 'delta': 6.583250172027414,
 'passed': 0,
 'observed': [16],
 'required': [19]}

In [38]:
sample_size

32

In [39]:
[(w, a.find_risk([w])['risk']) for w in range(0, sample_size, 10)]

[(0, 0.9999999999999988),
 (10, 0.9899785723715814),
 (20, 0.1421578033679998),
 (30, 0.0003768747035901778)]

In [40]:
winner_share_big_win = 2 * sample_size // 3

In [41]:
r = a.find_risk([winner_share_big_win])

In [42]:
r

{'risk': 0.08750577539655499,
 'delta': 0.20572656787585716,
 'passed': 1,
 'observed': [21],
 'required': [19]}

In [43]:
x = find_next_round_size(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule)

In [44]:
x

{'detailed': {'A-B': {'pstop_goal': [0.5, 0.7, 0.9],
   'next_round_sizes': [32, 48, 69],
   'prob_stop': [0.535513313605613, 0.7610118463711638, 0.9021135361161704]}},
 'future_round_sizes': [32, 48, 69]}

In [45]:
round_schedule = a.round_schedule

In [46]:
round_schedule

[32]

In [47]:
a.add_round_schedule(round_schedule)

In [48]:
a.round_schedule

[32]

In [49]:
x = a.find_next_round_size(pstop_goal)

In [50]:
x

{'detailed': {'A-B': {'pstop_goal': [0.5, 0.7, 0.9],
   'next_round_sizes': [60, 69, 109],
   'prob_stop': [0.7978204745282979, 0.8663421621443725, 0.9574296496375156]}},
 'future_round_sizes': [60, 69, 109]}

In [51]:
below_kmin = max(r['required']) - max(r['observed'])

In [52]:
(max(r['required']), max(r['observed']))

(19, 21)

In [53]:
future_round_sizes = x['future_round_sizes']

In [54]:
below_kmin

-2

In [55]:
future_round_sizes

[60, 69, 109]

In [56]:
list(map(lambda x: x - max(round_schedule) + 2 * below_kmin, future_round_sizes))

[24, 33, 73]

# Try to reproduce R2B2/Athena vs BRAVO
Sample Sizes for 90% probability of ending a Ballot Polling Audit of 2016 statewide Presidential contest

with risk limit 0.1, larger margins


In [57]:
# Read in data from 2016
election_2016 = json.load(open('data/2016_election.json'))

In [58]:
election_2016['Alabama']

{'contests': {'presidential': {'winners': 1,
   'candidates': ['Clinton', 'Trump'],
   'results': [729547, 1318255],
   'ballots_cast': 2123372,
   'state_id': 1,
   'margin': -0.2874828718792149}}}

In [59]:
def sample_state(state):
    "Return sample information for given state from 2016"

    election = election_2016[state]
    candidates = election['contests']['presidential']['candidates']
    results = election['contests']['presidential']['results']
    ballots_cast = election['contests']['presidential']['ballots_cast']
    athena_sample = sample90v(results[0], results[1], ballots_cast)
    return athena_sample

In [60]:
states = ['Alabama', 'Maryland', 'New York', 'Rhode Island', 'New Jersey', 'Ohio', 'Virginia',
          'Georgia', 'North Carolina', 'Arizona', 'Nevada' ]
# skip 'Minnesota', 'Florida', 'Wisconsin', 'Pennsylvania', 'Michigan']

In [61]:
athena_results = {}
with redirect_output("debug_output.txt"):
  for state in states:
    athena_results[state] = sample_state(state)

In [62]:
{s: r[0]  for s, r in athena_results.items()}

{'Alabama': 90,
 'Maryland': 94,
 'New York': 132,
 'Rhode Island': 288,
 'New Jersey': 346,
 'Ohio': 1024,
 'Virginia': 2365,
 'Georgia': 2606,
 'North Carolina': 5117,
 'Arizona': 5313,
 'Nevada': 11389}

# Misc snippets of code

In [63]:
with redirect_output("my_output.txt"):
    x = sample_state('Alabama')

In [64]:
x

(90,
 {'detailed': {'A-B': {'pstop_goal': [0.9],
    'next_round_sizes': [90],
    'prob_stop': [0.9054503985476622]}},
  'future_round_sizes': [90]})

In [65]:
e3 = find_next_round_size(audit_type, alpha, delta, ["A", "B", "C"], [600, 300, 100], ballots_cast, winners, name, model, pstop_goal, round_schedule)



In [66]:
e3

{'detailed': {'A-B': {'pstop_goal': [0.5, 0.7, 0.9],
   'next_round_sizes': [60, 69, 109],
   'prob_stop': [0.7978204745282979, 0.8663421621443725, 0.9574296496375156]},
  'A-C': {'pstop_goal': [0.5, 0.7, 0.9],
   'next_round_sizes': [48, 48, 48],
   'prob_stop': [0.9910306966080233, 0.6908930982960162, 0.6908930982960162]}},
 'future_round_sizes': [60, 69, 109]}

In [67]:
election = {
    "alpha": alpha,
    "delta": delta,
    "candidates": candidates,
    "results": results,
    "ballots_cast": ballots_cast,
    "winners": winners,
    "name": name,
    "model": model,
    "pstop": pstop_goal,
}

# Ignore the rest - earlier work by hand

In [68]:
below_kmin = max(r['required']) - max(r['observed'])

In [69]:
below_kmin

-2

In [70]:
(max(r['required']), max(r['observed']))

(19, 21)

In [71]:
x = a.find_next_round_size(pstop_goal)

In [72]:
future_round_sizes = x['future_round_sizes']

In [73]:
future_round_sizes

[60, 69, 109]

In [74]:
incremental_round_sizes = list(map(lambda x: x - max(a.round_schedule) + 2 * below_kmin, future_round_sizes))

In [75]:
incremental_round_sizes

[24, 33, 73]

In [76]:
incremental_sample_size = incremental_round_sizes[0]

In [77]:
incremental_sample_size

24

In [78]:
a.add_round_schedule(a.round_schedule + [max(a.round_schedule) + incremental_sample_size])

In [79]:
print_attributes(a)

{'alpha': 0.1,
 'audit_kmins': [19],
 'audit_observations': [],
 'audit_type': 'ATHENA',
 'delta': 1.0,
 'election': <athena.election.Election object at 0x7fa0b27fb2d0>,
 'elections': [],
 'round_schedule': [32, 56]}


In [80]:
winner_shares += [a.round_schedule[-1] // 2]

In [81]:
winner_shares

[56, 122, 209, 28]

In [82]:
r = a.find_risk(winner_shares)

IndexError: index 56 is out of bounds for axis 0 with size 33

In [None]:
r

## Round 3

In [None]:
below_kmin = max(r['required']) - max(r['observed'])

In [None]:
below_kmin

In [None]:
(max(r['required']), max(r['observed']))

In [None]:
x = a.find_next_round_size(pstop_goal)

In [None]:
future_round_sizes = x['future_round_sizes']

In [None]:
future_round_sizes

In [None]:
incremental_round_sizes = list(map(lambda x: x - max(a.round_schedule) + 2 * below_kmin, future_round_sizes))

In [None]:
incremental_round_sizes

Tracks interactive output so far: https://gist.github.com/nealmcb/0a165b790b6732096c89535d66aab976

[not true? ] For some reason, need to create a new Audit object with round schedule, can't just update round schedule in existing Audit object (?)

In [None]:
a = make_audit(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule)

In [None]:
x = a.find_next_round_size(pstop_goal)

In [None]:
x

In [None]:
future_round_sizes = x['future_round_sizes']

In [None]:
future_round_sizes

doesn't match: 112, 184.  should be 226 260 328?

In [None]:
future_round_sizes

In [None]:
list(map(lambda x: x - max(round_schedule) + 2 * below_kmin, future_round_sizes))

In [None]:
max(round_schedule)

dups...

In [None]:
round_schedule = a.round_schedule

In [None]:
a = make_audit(audit_type, alpha, delta, candidates, results, ballots_cast, winners, name, model, pstop_goal, round_schedule)

In [None]:
print_attributes(a)

In [None]:
print_attributes(a.election)