In [1]:
import pandas as pd
import datetime
import mariadb
import numpy as np
from pathlib import Path

In [2]:
csv_path = Path().cwd().parent.joinpath('voter_history_files/2018.csv')
import_csv = pd.read_csv(csv_path, converters={'County Name': str, 'Voter Registration Number': str, 'Election Date':str, 'Election Type':str, 'Party': str, 'Ballot Style':str, 'Absentee':str, 'Provisional':str, 'Supplemental':str})

In [3]:
import_csv

Unnamed: 0,County Name,Voter Registration Number,Election Date,Election Type,Party,Ballot Style,Absentee,Provisional,Supplemental
0,MCDUFFIE,00200975,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
1,MCDUFFIE,00214376,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
2,MCDUFFIE,00214519,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
3,MCDUFFIE,00222361,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
4,MCDUFFIE,00222385,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
...,...,...,...,...,...,...,...,...,...
7462150,LANIER,00710452,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N
7462151,LANIER,00710201,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,MAIL IN,Y,N,N
7462152,LANIER,00710092,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N
7462153,LANIER,00710585,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N


In [4]:
df = import_csv.copy()

In [5]:
df = df.rename(columns={'County Name': 'county_name', 'Voter Registration Number': 'voter_registration_number', 'Election Date': 'election_date', 'Election Type': 'election_type', 'Party': 'party', 'Ballot Style': 'ballot_style', 'Absentee': 'absentee', 'Provisional': 'provisional', 'Supplemental':'supplemental'})

In [6]:
def add_row_names(column):
    counter = 0
    row_name_list = []
    for row in column:
        counter += 1
        row_name_list.append(counter)
    return row_name_list

In [7]:
df.insert(0, 'row_names', add_row_names(df['county_name']))

In [8]:
def column_float_to_strings(column):
    string_list = []
    for entry in column:
        if str(entry) == 'nan':
            string_list.append("nan")
        else:
            string_list.append(str(entry))
    return string_list

In [9]:
def remove_last_two_digits(column):
    string_list = []
    for entry in column:
        string_list.append(entry[0:-2])
    return string_list

In [10]:
df['voter_registration_number'][df['voter_registration_number'].isnull()]

Series([], Name: voter_registration_number, dtype: object)

In [11]:
df

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
0,1,MCDUFFIE,00200975,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
1,2,MCDUFFIE,00214376,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
2,3,MCDUFFIE,00214519,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
3,4,MCDUFFIE,00222361,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
4,5,MCDUFFIE,00222385,01/30/2018,SPECIAL ELECTION,,REGULAR,N,N,N
...,...,...,...,...,...,...,...,...,...,...
7462150,7462151,LANIER,00710452,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N
7462151,7462152,LANIER,00710201,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,MAIL IN,Y,N,N
7462152,7462153,LANIER,00710092,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N
7462153,7462154,LANIER,00710585,08/21/2018,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N


In [12]:
def reformat_date(column):
    date_list = []
    for entry in column:
        date_list.append(f"{entry[6:10]}" + "-" + f"{entry[0:2]}" + "-" + f"{entry[3:5]}")
    return date_list

In [13]:
df['election_date'] = reformat_date(df['election_date'])

In [14]:
df

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
0,1,MCDUFFIE,00200975,2018-01-30,SPECIAL ELECTION,,REGULAR,N,N,N
1,2,MCDUFFIE,00214376,2018-01-30,SPECIAL ELECTION,,REGULAR,N,N,N
2,3,MCDUFFIE,00214519,2018-01-30,SPECIAL ELECTION,,REGULAR,N,N,N
3,4,MCDUFFIE,00222361,2018-01-30,SPECIAL ELECTION,,REGULAR,N,N,N
4,5,MCDUFFIE,00222385,2018-01-30,SPECIAL ELECTION,,REGULAR,N,N,N
...,...,...,...,...,...,...,...,...,...,...
7462150,7462151,LANIER,00710452,2018-08-21,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N
7462151,7462152,LANIER,00710201,2018-08-21,SPECIAL PRIMARY,REPUBLICAN,MAIL IN,Y,N,N
7462152,7462153,LANIER,00710092,2018-08-21,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N
7462153,7462154,LANIER,00710585,2018-08-21,SPECIAL PRIMARY,REPUBLICAN,IN PERSON,Y,N,N


In [17]:
df = df[df['election_type'] == "GENERAL"]

In [18]:
output_path = Path().cwd().parent.joinpath('output_csv/general_2018.csv')
df.to_csv(output_path)

In [19]:
df.value_counts('election_type')

election_type
GENERAL    3942933
Name: count, dtype: int64