In [3]:
import json
import zipfile
import os
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

In [4]:
from pathlib import Path
data_dir = Path('.')
data_dir.mkdir(exist_ok = True)
file_path = data_dir / Path('CVR2.zip')
dest_path = file_path

In [5]:
zip_1 = zipfile.ZipFile(dest_path, 'r')
with zip_1.open('CvrExport.json') as cvr_2:
    cvr_exp = pd.read_json(cvr_2)

In [6]:
cvr_sessions = cvr_exp['Sessions']
cvr_sessions.head()

0    {'TabulatorId': 530, 'BatchId': 1, 'RecordId':...
1    {'TabulatorId': 530, 'BatchId': 1, 'RecordId':...
2    {'TabulatorId': 530, 'BatchId': 1, 'RecordId':...
3    {'TabulatorId': 530, 'BatchId': 1, 'RecordId':...
4    {'TabulatorId': 530, 'BatchId': 1, 'RecordId':...
Name: Sessions, dtype: object

# Questions

## How many people voted for both GOP US senators but made no vote at all for President? (was it concentrated in any particular precinct?)

In [7]:
## Go to levels for the senate and presidential contests and count the numbers of votes for senators but no president
## Outstack Condition 6 is code for no vote
sen_no_pres_count = 0
index_count = 0
for i in cvr_sessions:
    if not 6 in i['Original']['Cards'][0]['Contests'][1].get('OutstackConditionIds'):
        if 6 in i['Original']['Cards'][0]['Contests'][0].get('OutstackConditionIds'):
            sen_no_pres_count += 1
    index_count += 1
sen_no_pres_count

59

## How many people voted for both GOP US senators but voted for Biden? (Any precinct concentration?)

In [8]:
gop_biden_pres_count = 0
purdue_count = 0
both_gop_count = 0
for i in cvr_sessions:
    if not 6 in i['Original']['Cards'][0]['Contests'][1].get('OutstackConditionIds'):
        if i['Original']['Cards'][0]['Contests'][1].get('Marks')[0].get('CandidateId') == 4:
            purdue_count += 1
            if not 6 in i['Original']['Cards'][0]['Contests'][2].get('OutstackConditionIds'):
                if i['Original']['Cards'][0]['Contests'][2].get('Marks')[0].get('CandidateId') in [9, 11, 24, 16, 19]:
                    both_gop_count += 1
                    if not 6 in i['Original']['Cards'][0]['Contests'][0].get('OutstackConditionIds'):
                        if i['Original']['Cards'][0]['Contests'][0].get('Marks')[0].get('CandidateId') == 2:
                            gop_biden_pres_count += 1
print(purdue_count, both_gop_count, gop_biden_pres_count)

36939 34748 240


## How many people split their vote between One GOP and one Dem US Senator?

In [9]:
purdue_count = 0
dem = 0
split_count = 0
for i in cvr_sessions:
    if not 6 in i['Original']['Cards'][0]['Contests'][1].get('OutstackConditionIds'):
        if i['Original']['Cards'][0]['Contests'][1].get('Marks')[0].get('CandidateId') == 4:
            purdue_count += 1
        elif not 6 in i['Original']['Cards'][0]['Contests'][2].get('OutstackConditionIds'):
            if i['Original']['Cards'][0]['Contests'][2].get('Marks')[0].get('CandidateId') in [26, 17, 18, 20, 25, 23]:
                split_count += 1
print(purdue_count, split_count)

36939 8985


## For ballots on which the mark density on more 3 or more votes is less than 15%, how many undervotes are there? (That is related to the faulty scanning issue. Those might be votes that are not counted. )

In [10]:
undervotes = []
for i in cvr_sessions:
    under_15_density = 0
    under_count = 0
    no_mark_category = 0
    registered_blank = 0
    zero_density = 0
    for j in i['Original']['Cards'][0]['Contests']:
        density = 0
        if j.get('Undervotes') == 1:
            under_count += 1
        if j.get('Marks'):
            density = j.get('Marks')[0].get('MarkDensity')
        if density > 0 and density < 15:
            under_15_density += 1
        if density == 0:
            zero_density += 1
        if not j.get('Marks'):
            no_mark_category += 1
        if 6 in j.get('OutstackConditionIds'):
            registered_blank += 1
    if under_15_density >= 3:
        undervotes.append([under_15_density, zero_density, registered_blank, no_mark_category, under_count])
undervotes_df = pd.DataFrame(undervotes, columns=['Under 15', 'Zero density', 'Labelled blank', 'No density listed', 'Undercount'])
undervotes_df

Unnamed: 0,Under 15,Zero density,Labelled blank,No density listed,Undercount
0,3,3,9,3,9
1,5,10,17,10,17
2,4,1,11,1,11
3,5,2,13,2,13
4,7,3,10,3,10
5,3,8,18,8,18
6,5,0,11,0,11
7,4,8,9,8,9


## How many “Ambiguous votes” were not counted?

In [11]:
ambiguous_not_counted = 0
for i in cvr_sessions:
    for j in i['Original']['Cards'][0]['Contests']:
        if j.get('Marks'):
            if j.get('Marks')[0].get('IsAmbiguous'):
                if not j.get('Marks')[0].get('IsVote'):
                    ambiguous_not_counted += 1
ambiguous_not_counted

210

## How many overvotes were recorded?

In [12]:
overvotes = 0
for i in cvr_sessions:
    for j in i['Original']['Cards'][0]['Contests']:
        if j.get('Overvotes') != 0:
            overvotes += j.get('Overvotes')
overvotes

191

## Can the votes for candidate be easily tabulated?
yes

## Is there a scanner batch number on the CVR?
yes