In [1]:
import numpy as np
import pandas as pd
import math
import os.path
import json
import time

In [2]:
# Download absentee data source https://s3.amazonaws.com/dl.ncsbe.gov/ENRS/2016_11_08/absentee_20161108.zip
DATA_ROOT_PATH = '/src/data/nc-voting'
FILENAME = 'absentee_20161108.csv'

In [3]:
df = pd.read_csv (os.path.join(DATA_ROOT_PATH, FILENAME), encoding = "ISO-8859-1")

  interactivity=interactivity, compiler=compiler, result=result)


# Data Exploration

In [4]:
df['ballot_rtn_status_cat'] = df['ballot_rtn_status'].astype('category')

In [5]:
df['ballot_rtn_status_cat'].value_counts()

ACCEPTED                   3147210
SPOILED                      15529
CANCELLED                     3521
WITNESS INFO INCOMPLETE       2700
RETURNED AFTER DEADLINE       1215
VOTER SIGNATURE MISSING       1101
RETURNED UNDELIVERABLE         778
NOT VOTED                      625
WRONG VOTER                    549
DUPLICATE                       39
SIGNATURE DIFFERENT             32
NOT PROPERLY NOTARIZED          32
E-TRANSMISSION FAILURE          15
Name: ballot_rtn_status_cat, dtype: int64

In [6]:
df['ballot_req_delivery_type_cat'] = df['ballot_req_delivery_type'].astype('category')
df['ballot_req_delivery_type_cat'].value_counts()

IN PERSON    2960469
MAIL          222498
E-MAIL         17972
FAX               20
Name: ballot_req_delivery_type_cat, dtype: int64

In [7]:
df['ballot_req_type_cat'] = df['ballot_req_type'].astype('category')
df['ballot_req_type_cat'].value_counts()

ONE-STOP    2960318
MAIL         240641
Name: ballot_req_type_cat, dtype: int64

In [8]:
df["election_dt"].astype('category').value_counts()

11/08/2016    3200959
Name: election_dt, dtype: int64

# Rejected Rate in Absentee Ballot by Mail

In [9]:
vote_by_mail_df = df[df['ballot_req_type_cat']=='MAIL']

In [10]:
vote_by_mail_df['ballot_rtn_status_cat'].value_counts()

ACCEPTED                   191601
SPOILED                     15529
WITNESS INFO INCOMPLETE      2700
RETURNED AFTER DEADLINE      1215
VOTER SIGNATURE MISSING      1101
RETURNED UNDELIVERABLE        778
SIGNATURE DIFFERENT            32
NOT PROPERLY NOTARIZED         32
DUPLICATE                      25
E-TRANSMISSION FAILURE         15
WRONG VOTER                     0
NOT VOTED                       0
CANCELLED                       0
Name: ballot_rtn_status_cat, dtype: int64

In [11]:
len(vote_by_mail_df)

240641

In [12]:
rejected_rate_in_mail=1-((191601 + 15529 )/240641)
print("Rejected Rate of absentee ballot in mail : {:.2%}".format(rejected_rate_in_mail))

Rejected Rate of absentee ballot in mail : 13.93%


# Rejected Rate in Absentee Ballot in Person

In [13]:
one_stop_vote_in_person = df[df['ballot_req_type_cat']=='ONE-STOP']

In [14]:
one_stop_vote_in_person['ballot_rtn_status_cat'].value_counts()

ACCEPTED                   2955609
CANCELLED                     3521
NOT VOTED                      625
WRONG VOTER                    549
DUPLICATE                       14
WITNESS INFO INCOMPLETE          0
VOTER SIGNATURE MISSING          0
SPOILED                          0
SIGNATURE DIFFERENT              0
RETURNED UNDELIVERABLE           0
RETURNED AFTER DEADLINE          0
NOT PROPERLY NOTARIZED           0
E-TRANSMISSION FAILURE           0
Name: ballot_rtn_status_cat, dtype: int64

In [15]:
len(one_stop_vote_in_person)

2960318

In [16]:
rejected_rate_in_person=1-((2955609+3521)/2960318)
print("Rejected Rate of Early Voting in Person: {:.2%}".format(rejected_rate_in_person))

Rejected Rate of Early Voting in Person: 0.04%


# Conclusion

- The rejected rate in person is far lower than the rejected rate by mail. Go vote in person