In [1]:
import numpy as np
import pandas as pd
import math
import os.path
import json
import time

In [11]:
# Download data source on NOV/19/2020
# wget https://s3.amazonaws.com/dl.ncsbe.gov/ENRS/2020_11_03/absentee_20201103.zip

DATA_ROOT_PATH = '/src/data/nc-voting/absentee'
#FILENAME = 'absentee_20201103.csv.sample'
FILENAME = 'absentee_20201103.csv'

In [12]:
df = pd.read_csv(os.path.join(DATA_ROOT_PATH, FILENAME), encoding = "ISO-8859-1")

  interactivity=interactivity, compiler=compiler, result=result)


# Data Exploration

In [13]:
df.dtypes

county_desc                    object
voter_reg_num                   int64
ncid                           object
voter_last_name                object
voter_first_name               object
voter_middle_name              object
race                           object
ethnicity                      object
gender                         object
age                             int64
voter_street_address           object
voter_city                     object
voter_state                    object
voter_zip                     float64
ballot_mail_street_address     object
ballot_mail_city               object
ballot_mail_state              object
ballot_mail_zip                object
other_mail_addr1               object
other_mail_addr2               object
other_city_state_zip           object
relative_request_name          object
relative_request_address       object
relative_request_city          object
relative_request_state         object
relative_request_zip           object
election_dt 

In [14]:
df.head(3)

Unnamed: 0,county_desc,voter_reg_num,ncid,voter_last_name,voter_first_name,voter_middle_name,race,ethnicity,gender,age,...,ballot_req_delivery_type,ballot_req_type,ballot_request_party,ballot_req_dt,ballot_send_dt,ballot_rtn_dt,ballot_rtn_status,site_name,sdr,mail_veri_status
0,ALAMANCE,9005990,AA56273,AABEL,RUTH,EVELYN,WHITE,NOT HISPANIC or NOT LATINO,F,85,...,MAIL,MAIL,UNA,09/07/2020,09/09/2020,10/02/2020,ACCEPTED,,,
1,ALAMANCE,9178574,AA201627,AARDEN,JONI,AUTUMN,WHITE,UNDESIGNATED,F,42,...,IN PERSON,ONE-STOP,UNA,10/29/2020,10/29/2020,10/29/2020,ACCEPTED,MEBANE ARTS AND COMMUN ITY CENTER,,
2,ALAMANCE,9205561,AA216996,AARMSTRONG,TIMOTHY,DUANE,WHITE,UNDESIGNATED,M,54,...,IN PERSON,ONE-STOP,REP,10/31/2020,10/31/2020,10/31/2020,ACCEPTED,MEBANE ARTS AND COMMUN ITY CENTER,Y,1ST VFY


In [15]:
df['ballot_rtn_status_cat'] = df['ballot_rtn_status'].astype('category')

In [16]:
df['ballot_rtn_status_cat'].value_counts()

ACCEPTED                        4621254
SPOILED                          276804
                                 165591
ACCEPTED - CURED                   7946
WITNESS INFO INCOMPLETE            5891
PENDING CURE                       5184
CANCELLED                          4212
RETURNED UNDELIVERABLE             2580
WRONG VOTER                         845
RETURNED AFTER DEADLINE             813
NOT VOTED                           703
DUPLICATE                           612
CONFLICT                            517
PENDING                              66
NO TIME FOR CURE - CONTACTED         59
SIGNATURE DIFFERENT                  28
ASSISTANT INFO INCOMPLETE             8
E-TRANSMISSION FAILURE                4
NOT PROPERLY NOTARIZED                1
Name: ballot_rtn_status_cat, dtype: int64

In [17]:
df['ballot_req_delivery_type_cat'] = df['ballot_req_delivery_type'].astype('category')
df['ballot_req_delivery_type_cat'].value_counts()

IN PERSON    3634355
MAIL         1431188
E-MAIL         27567
FAX                8
Name: ballot_req_delivery_type_cat, dtype: int64

In [18]:
df['ballot_req_type_cat'] = df['ballot_req_type'].astype('category')
df['ballot_req_type_cat'].value_counts()

ONE-STOP    3634108
MAIL        1459010
Name: ballot_req_type_cat, dtype: int64

In [19]:
df["election_dt"].astype('category').value_counts()

11/03/2020    5093118
Name: election_dt, dtype: int64

# Rejected Rate in Absentee Ballot by Mail

In [20]:
vote_by_mail_df = df[df['ballot_req_type_cat']=='MAIL']

In [21]:
vote_by_mail_df['ballot_rtn_status_cat'].value_counts(dropna=False)

ACCEPTED                        993455
SPOILED                         276804
                                165591
ACCEPTED - CURED                  7946
WITNESS INFO INCOMPLETE           5891
PENDING CURE                      5184
RETURNED UNDELIVERABLE            2580
RETURNED AFTER DEADLINE            813
DUPLICATE                          558
PENDING                             66
NO TIME FOR CURE - CONTACTED        59
SIGNATURE DIFFERENT                 28
CONFLICT                            22
ASSISTANT INFO INCOMPLETE            8
E-TRANSMISSION FAILURE               4
NOT PROPERLY NOTARIZED               1
CANCELLED                            0
NOT VOTED                            0
WRONG VOTER                          0
Name: ballot_rtn_status_cat, dtype: int64

In [22]:
len(vote_by_mail_df)

1459010

In [24]:
rejected_rate_in_mail=1-((993455 + 276804 + 165591 + 7946)/1459010)
print("Rejected Rate of absentee ballot in mail : {:.2%}".format(rejected_rate_in_mail))

Rejected Rate of absentee ballot in mail : 1.04%


# Rejected Rate in Absentee Ballot in Person

In [32]:
one_stop_vote_in_person = df[df['ballot_req_type_cat']=='ONE-STOP']

In [33]:
one_stop_vote_in_person['ballot_rtn_status_cat'].value_counts(dropna=False)

ACCEPTED                        3627799
CANCELLED                          4212
WRONG VOTER                         845
NOT VOTED                           703
CONFLICT                            495
DUPLICATE                            54
NO TIME FOR CURE - CONTACTED          0
ACCEPTED - CURED                      0
ASSISTANT INFO INCOMPLETE             0
E-TRANSMISSION FAILURE                0
NOT PROPERLY NOTARIZED                0
WITNESS INFO INCOMPLETE               0
PENDING                               0
PENDING CURE                          0
RETURNED AFTER DEADLINE               0
RETURNED UNDELIVERABLE                0
SIGNATURE DIFFERENT                   0
SPOILED                               0
                                      0
Name: ballot_rtn_status_cat, dtype: int64

In [34]:
len(one_stop_vote_in_person)

3634108

In [36]:
rejected_rate_in_person=1-((3627799+4212)/3634108)
print("Rejected Rate of Early Voting in Person: {:.2%}".format(rejected_rate_in_person))

Rejected Rate of Early Voting in Person: 0.06%


# Conclusion

- The rejected rate in person is far lower than the rejected rate by mail. Go vote in person