In [1]:
import numpy as np
import pandas as pd
import math
import os.path
import json
import time

In [2]:
# Download absentee data source https://s3.amazonaws.com/dl.ncsbe.gov/ENRS/2016_11_08/absentee_20161108.zip
DATA_ROOT_PATH = '/src/data/nc-voting'
FILENAME = 'absentee_20161108.csv'

In [3]:
df = pd.read_csv (os.path.join(DATA_ROOT_PATH, FILENAME), encoding = "ISO-8859-1")

  interactivity=interactivity, compiler=compiler, result=result)


# Data Exploration

In [4]:
df['ballot_rtn_status_cat'] = df['ballot_rtn_status'].astype('category')

In [5]:
df['ballot_rtn_status_cat'].value_counts()

ACCEPTED                   3147210
SPOILED                      15529
CANCELLED                     3521
WITNESS INFO INCOMPLETE       2700
RETURNED AFTER DEADLINE       1215
VOTER SIGNATURE MISSING       1101
RETURNED UNDELIVERABLE         778
NOT VOTED                      625
WRONG VOTER                    549
DUPLICATE                       39
SIGNATURE DIFFERENT             32
NOT PROPERLY NOTARIZED          32
E-TRANSMISSION FAILURE          15
Name: ballot_rtn_status_cat, dtype: int64

In [6]:
df['ballot_req_delivery_type_cat'] = df['ballot_req_delivery_type'].astype('category')
df['ballot_req_delivery_type_cat'].value_counts()

IN PERSON    2960469
MAIL          222498
E-MAIL         17972
FAX               20
Name: ballot_req_delivery_type_cat, dtype: int64

In [7]:
df['ballot_req_type_cat'] = df['ballot_req_type'].astype('category')
df['ballot_req_type_cat'].value_counts()

ONE-STOP    2960318
MAIL         240641
Name: ballot_req_type_cat, dtype: int64

In [8]:
df["election_dt"].astype('category').value_counts()

11/08/2016    3200959
Name: election_dt, dtype: int64

# Rejected Rate in Absentee Ballot by Mail

In [9]:
vote_by_mail_df = df[df['ballot_req_type_cat']=='MAIL']

In [17]:
vote_by_mail_df['ballot_rtn_status_cat'].value_counts(dropna=False)

ACCEPTED                   191601
NaN                         27613
SPOILED                     15529
WITNESS INFO INCOMPLETE      2700
RETURNED AFTER DEADLINE      1215
VOTER SIGNATURE MISSING      1101
RETURNED UNDELIVERABLE        778
SIGNATURE DIFFERENT            32
NOT PROPERLY NOTARIZED         32
DUPLICATE                      25
E-TRANSMISSION FAILURE         15
WRONG VOTER                     0
NOT VOTED                       0
CANCELLED                       0
Name: ballot_rtn_status_cat, dtype: int64

In [11]:
len(vote_by_mail_df)

240641

In [38]:
rejected_rate_in_mail=1-((191601 + 15529 + 27613 )/240641)
print("Rejected Rate of absentee ballot in mail : {:.2%}".format(rejected_rate_in_mail))

Rejected Rate of absentee ballot in mail : 2.45%


In [34]:
vote_by_mail_df.dtypes

county_desc                       object
voter_reg_num                      int64
ncid                              object
voter_last_name                   object
voter_first_name                  object
voter_middle_name                 object
race                              object
gender                            object
age                                int64
voter_street_address              object
voter_city                        object
voter_state                       object
voter_zip                        float64
ballot_mail_street_address        object
ballot_mail_city                  object
ballot_mail_state                 object
ballot_mail_zip                   object
other_mail_addr1                  object
other_mail_addr2                  object
other_city_state_zip              object
election_dt                       object
voter_party_code                  object
precinct_desc                     object
cong_dist_desc                    object
nc_house_desc   

In [36]:
vote_by_mail_na_rt_status_df = vote_by_mail_df[pd.isna(vote_by_mail_df['ballot_rtn_status_cat'])]

In [37]:
vote_by_mail_na_rt_status_df.head(10)

Unnamed: 0,county_desc,voter_reg_num,ncid,voter_last_name,voter_first_name,voter_middle_name,race,gender,age,voter_street_address,...,ballot_req_dt,ballot_send_dt,ballot_rtn_dt,ballot_rtn_status,site_name,sdr,mail_veri_status,ballot_rtn_status_cat,ballot_req_delivery_type_cat,ballot_req_type_cat
41,ALAMANCE,9131045,AA171628,ABRAHAMS,DIANA,SHULAMITH,WHITE,F,25,3121 COMMERCE PL #A,...,09/27/2016,09/27/2016,,,,,,,E-MAIL,MAIL
250,ALAMANCE,9164726,AA193714,ALBRIGHT,HALEIGH,JANE,WHITE,F,23,215 ALTAMAHAW-UNION RIDGE RD #C,...,10/17/2016,10/20/2016,,,,,VERIFIED,,MAIL,MAIL
350,ALAMANCE,9131930,AA138788,ALEXANDER,LAWRENCE,BRENT,WHITE,M,31,1765 CARL NOAH RD,...,10/21/2016,,,,,,,,MAIL,MAIL
369,ALAMANCE,9143639,AA180908,ALEXANDRE,MICHAEL,THOMAS,WHITE,M,74,2381 LACY HOLT RD,...,11/01/2016,11/01/2016,,,MAILED,,,,MAIL,MAIL
388,ALAMANCE,9081194,AA130412,ALIFF,ROBERTA,LEE,WHITE,F,76,501 BLAKEY HALL LN #210,...,11/02/2016,11/02/2016,,,MAILED,,,,MAIL,MAIL
389,ALAMANCE,9160061,AA190814,ALIMUSLEH,WISAM,TAWFIK,OTHER,M,19,1102 BIBURY CT,...,08/19/2016,09/09/2016,,,,,,,MAIL,MAIL
437,ALAMANCE,9146950,EH4212,ALLEN,EDITH,WILKINS,WHITE,F,91,1670 WESTBROOK AVE,...,10/10/2016,10/24/2016,,,,,,,MAIL,MAIL
632,ALAMANCE,9080425,AA129643,ALLMOND,TIMOTHY,,WHITE,M,49,3140 WORMRANCH RD,...,10/05/2016,10/05/2016,,,MAILED,,,,MAIL,MAIL
827,ALAMANCE,9162565,AA192326,ALVAREZ,ADELFO,,UNDESIGNATED,M,19,1662 PAYNE RD,...,10/07/2016,10/07/2016,,,,,,,MAIL,MAIL
1082,ALAMANCE,9113743,AA158898,ANGELOS,TERILEE,GIANOPOULOS,WHITE,F,63,4053 HANSEL TRCE,...,10/06/2016,10/06/2016,,,,,,,MAIL,MAIL


# Rejected Rate in Absentee Ballot in Person

In [13]:
one_stop_vote_in_person = df[df['ballot_req_type_cat']=='ONE-STOP']

In [18]:
one_stop_vote_in_person['ballot_rtn_status_cat'].value_counts(dropna=False)

ACCEPTED                   2955609
CANCELLED                     3521
NOT VOTED                      625
WRONG VOTER                    549
DUPLICATE                       14
WITNESS INFO INCOMPLETE          0
VOTER SIGNATURE MISSING          0
SPOILED                          0
SIGNATURE DIFFERENT              0
RETURNED UNDELIVERABLE           0
RETURNED AFTER DEADLINE          0
NOT PROPERLY NOTARIZED           0
E-TRANSMISSION FAILURE           0
Name: ballot_rtn_status_cat, dtype: int64

In [15]:
len(one_stop_vote_in_person)

2960318

In [16]:
rejected_rate_in_person=1-((2955609+3521)/2960318)
print("Rejected Rate of Early Voting in Person: {:.2%}".format(rejected_rate_in_person))

Rejected Rate of Early Voting in Person: 0.04%


# Conclusion

- The rejected rate in person is far lower than the rejected rate by mail. Go vote in person