# Ballot Polling Assertion RLA

In [1]:
from __future__ import division, print_function

import math
import json
import warnings
import numpy as np
import pandas as pd
import csv
import copy

from collections import OrderedDict
from IPython.display import display, HTML

from cryptorandom.cryptorandom import SHA256
from cryptorandom.sample import sample_by_index

from assertion_audit_utils import \
    Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters,\
    find_p_values, find_sample_size, new_sample_size, prep_sample, summarize_status,\
    write_audit_parameters
from dominion_tools import \
    prep_dominion_manifest, sample_from_manifest, write_cards_sampled

In [2]:
seed = 20546205145833673221  # use, e.g., 20 rolls of a 10-sided die. Seed doesn't have to be numeric
replacement = False

#risk_function = "kaplan_martingale"
#risk_fn = lambda x: TestNonnegMean.kaplan_martingale(x, N=N_cards)[0]

risk_function = "kaplan_kolmogorov"
risk_fn = lambda x: TestNonnegMean.kaplan_kolmogorov(x, N=N_cards, t=1/3, g=g)

g = 0.1
N_cards = 146662
N_cards_max = 147000 #Upper bound on number of ballots cast 


In [3]:
# Using same files as CVR stratum but treating as no-CVR 

manifest_file = './Data/N19 ballot manifest with WH location for RLA Upload VBM 11-14.xlsx'
manifest_type = 'STYLE'
sample_file = './Data/pollingsample.csv'
mvr_file = './Data/mvr_prepilot_test.json'

log_file = './Data/pollinglog.json'

In [4]:
error_rate = 0.002

In [5]:
# contests to audit. Edit with details of your contest (eg., Contest 339 is the DA race)
contests = {'339':{'risk_limit': 0.05,
                     'choice_function':'IRV',
                     'n_winners':int(1),
                     'candidates':['15','16','17','18'],
                     'reported_winners' : ['15'],
                     'assertion_file' : './Data/SF2019Nov8Assertions.json'
                    }
           }

In [6]:
# read the assertions for the IRV contest
for c in contests:
    if contests[c]['choice_function'] == 'IRV':
        with open(contests[c]['assertion_file'], 'r') as f:
            contests[c]['assertion_json'] = json.load(f)['audits'][0]['assertions']

In [7]:
all_assertions = Assertion.make_all_assertions(contests)

In [8]:
all_assertions

{'339': {'18 v 17 elim 15 16 45': <assertion_audit_utils.Assertion at 0x2c57ec93148>,
  '17 v 16 elim 15 18 45': <assertion_audit_utils.Assertion at 0x2c57ec910c8>,
  '15 v 18 elim 16 17 45': <assertion_audit_utils.Assertion at 0x2c57ec91088>,
  '18 v 16 elim 15 17 45': <assertion_audit_utils.Assertion at 0x2c57ec91648>,
  '17 v 16 elim 15 45': <assertion_audit_utils.Assertion at 0x2c57ec93c48>,
  '15 v 17 elim 16 45': <assertion_audit_utils.Assertion at 0x2c57ec93e88>,
  '15 v 17 elim 16 18 45': <assertion_audit_utils.Assertion at 0x2c57ec93648>,
  '18 v 16 elim 15 45': <assertion_audit_utils.Assertion at 0x2c57ec89f08>,
  '15 v 16 elim 17 45': <assertion_audit_utils.Assertion at 0x2c57ec89608>,
  '15 v 16 elim 17 18 45': <assertion_audit_utils.Assertion at 0x2c57ec89fc8>,
  '15 v 16 elim 18 45': <assertion_audit_utils.Assertion at 0x2c57ec89088>,
  '15 v 16 elim 45': <assertion_audit_utils.Assertion at 0x2c57ec891c8>,
  '15 v 45': <assertion_audit_utils.Assertion at 0x2c57ec89ac8>}}

## Read the ballot manifest

In [9]:
manifest = pd.read_excel(manifest_file)

# prep dominion_manifest w/o cvr processing
cols = ['Tray #', 'Tabulator Number', 'Batch Number', 'Total Ballots', 'VBMCart.Cart number']
assert set(cols).issubset(manifest.columns), "missing columns"
manifest_cards = manifest['Total Ballots'].sum()
if N_cards < N_cards_max:
    warnings.warn('The CVR list does not account for every card cast in the contest; adding a phantom batch to the manifest')
    r = {'Tray #': None, 'Tabulator Number': 'phantom', 'Batch Number': 1, \
            'Total Ballots': N_cards_max-N_cards, 'VBMCart.Cart number': None}
    manifest = manifest.append(r, ignore_index = True)
manifest['cum_cards'] = manifest['Total Ballots'].cumsum()    
for c in ['Tray #', 'Tabulator Number', 'Batch Number', 'VBMCart.Cart number']:
    manifest[c] = manifest[c].astype(str)
phantom_cards = N_cards_max-N_cards
#manifest, manifest_cards, phantom_cards = prep_dominion_manifest(manifest, N_cards, N_cards_max)

manifest


Unnamed: 0,Tray #,Tabulator Number,Batch Number,Total Ballots,VBMCart.Cart number,cum_cards
0,1,99808,78,116,3,116
1,1,99808,77,115,3,231
2,1,99808,79,120,3,351
3,1,99808,81,76,3,427
4,1,99808,80,116,3,543
...,...,...,...,...,...,...
5477,3506,99815,84,222,19,292779
5478,3506,99815,83,346,19,293125
5479,3506,99815,82,332,19,293457
5480,3507,99802,822,98,14,293555


In [10]:
# read cvrs: not needed?!

In [11]:
check_audit_parameters(risk_function, g, error_rate, contests)

In [13]:
n_cvrs = 0
write_audit_parameters(log_file, seed, replacement, risk_function, g, N_cards, n_cvrs, \
                       manifest_cards, phantom_cards, error_rate, contests)
'''
write_audit_parameters(log_file=log_file, seed=seed, replacement=replacement, \
    risk_function=risk_function, g=g, N_cards=N_cards, n_cvrs=0, manifest_cards=manifest_cards, \
        phantom_cards=phantom_cards, error_rate=error_rate, \
            contests=contests)
'''
#n_cvrs = 0
#write_audit_parameters(log_file, seed, replacement, risk_function, g, N_cards, n_cvrs, \
#    manifest_cards, phantom_cards, error_rate, contests)


'\nwrite_audit_parameters(log_file=log_file, seed=seed, replacement=replacement,     risk_function=risk_function, g=g, N_cards=N_cards, n_cvrs=0, manifest_cards=manifest_cards,         phantom_cards=phantom_cards, error_rate=error_rate,             contests=contests)\n'

## Set up for sampling

## Find initial sample size

In [14]:
# TODO ? ballot polling
sample_size = 200

## Draw the first sample

In [15]:
prng = SHA256(seed)
sample = sample_by_index(N_cards_max, sample_size, prng=prng)
n_phantom_sample = np.sum([i>N_cards for i in sample]) #TODO
print("The sample includes {} phantom cards.".format(n_phantom_sample))
print(sample)

The sample includes 2 phantom cards.


In [27]:
phantom_ballots = sample[sample>N_cards]
print(sample)
print(np.where(sample==phantom_ballots[0]), np.where(sample==phantom_ballots[1]))

[ 69999  23906  39510 114714  58087  96085  22366 130567 102040  58431
  15787  62055  68838  95872  12069  24119  70415   2781 107481  67972
  93133  84033  25189 132056  87262  45463 125559 126663 104427  65134
 135198  61702 132943  92507  73582  78464  59882  56033 144047  58343
  91146 126592    823   2124  20599 143661  98414  91437 101811  96674
 135236  84060  83677  47978 104803   9154  73646  45063 105083  42539
  67636 113876  38652  46176  60817 101723  47808  96391 112982  78952
 108493  82089  52371  41765  75914 130679  32624 131989 124900  55086
  51578     92  19704  65356  31529  94183 116793 144189 102782 146954
  67976 135246  23749  96259 118185  32670 113007 134142  58099 104324
  57062  23442 107997 106637 113673  66043  41445  25799 146935 102929
  75997  40482   4020  76436  13827  74228 126546  50367  17924   9147
  80551  50522  40404  16502 121526 107030 117061  67977 118242  92587
  77111 100239 139599  68527 146192  26899  81450  29325  69084  30734
  1085

In [16]:
manifest_sample_lookup = sample_from_manifest(manifest, sample)
#print(manifest_sample_lookup)

In [17]:
write_cards_sampled(sample_file, manifest_sample_lookup, print_phantoms=True)

## Read the audited sample data

In [18]:
with open(mvr_file) as f:
    mvr_json = json.load(f)

mvr_sample = CVR.from_dict(mvr_json['ballots'])

for i in range(10):
    print(mvr_sample[i])

id: 99807-3-2 votes: {'339': {'16': 3, '17': 2, '18': 1}} phantom: False
id: 99809-27-41 votes: {'339': {'15': 3, '16': 1, '17': 4, '18': 2}} phantom: False
id: 99807-4-20 votes: {'339': {'15': 1, '17': 2}} phantom: False
id: 99805-68-45 votes: {'339': {'15': 4, '16': 1, '17': 2, '18': 3}} phantom: False
id: 99805-30-44 votes: {'339': {'15': 3, '16': 2, '17': 1, '18': 4}} phantom: False
id: 99805-30-89 votes: {'339': {'15': 2, '17': 1}} phantom: False
id: 99808-28-57 votes: {'339': {'17': 1}} phantom: False
id: 99811-26-37 votes: {'339': {'18': 1}} phantom: False
id: 99804-19-38 votes: {'339': {'15': 2, '18': 1}} phantom: False
id: 99802-15-23 votes: {'339': {'15': 3, '16': 4, '17': 1, '18': 2}} phantom: False


## Find measured risks for all assertions

In [28]:
p_max = 0
for c in contests.keys():
    contests[c]['p_values'] = {}
    contests[c]['proved'] = {}
    contest_max_p = 0

    for asrtn in all_assertions[c]:
        a = all_assertions[c][asrtn]
        d = [a.assorter.assort(i) for i in mvr_sample]
        #print(d, '\n', a.assorter_mean(mvr_sample), '\n')
        print(d[89], d[108], '\n')
        a.p_value = risk_fn(d)
        #print(a.p_value, '\n')
        a.proved = (a.p_value <= contests[c]['risk_limit'])
        contests[c]['p_values'].update({asrtn: a.p_value})
        contests[c]['proved'].update({asrtn: int(a.proved)})
        contest_max_p = np.max([contest_max_p, a.p_value])

    contests[c].update({'max_p': contest_max_p})
    p_max = np.max([p_max, contests[c]['max_p']])
#print(contests['339']['p_values'], '\n', contests['339']['proved'])

1.0 1.0 

1.0 0.5 

0.0 0.0 

1.0 1.0 

0.5 0.5 

0.5 0.5 

0.0 0.5 

1.0 1.0 

0.5 0.5 

0.5 0.5 

0.5 0.5 

0.5 0.5 

0.5 0.5 



In [20]:
print("maximum assertion p-value {}".format(p_max))
done = summarize_status(contests, all_assertions)

maximum assertion p-value 0.09648735152865984
p-values for assertions in contest 339
18 v 17 elim 15 16 45 0.017131462872821586
17 v 16 elim 15 18 45 0.0008608892161843159
15 v 18 elim 16 17 45 0.09648735152865984
18 v 16 elim 15 17 45 1.5352691149579148e-10
17 v 16 elim 15 45 3.31448375276036e-14
15 v 17 elim 16 45 6.522662675221176e-10
15 v 17 elim 16 18 45 6.973094458142873e-07
18 v 16 elim 15 45 5.308512042863627e-20
15 v 16 elim 17 45 3.913408320026705e-18
15 v 16 elim 17 18 45 1.5954041110126188e-10
15 v 16 elim 18 45 1.4115146365349999e-22
15 v 16 elim 45 3.812110397723145e-26
15 v 45 7.891115024017424e-45

contest 339 audit INCOMPLETE at risk limit 0.05. Attained risk 0.09648735152865984
assertions remaining to be proved:
15 v 18 elim 16 17 45: current risk 0.09648735152865984


In [21]:
write_audit_parameters(log_file, seed, replacement, risk_function, g, N_cards, n_cvrs, \
                       manifest_cards, phantom_cards, error_rate, contests)