In [1]:
import pandas as pd
import datetime
import mariadb
import numpy as np
from pathlib import Path

In [2]:
csv_path = Path().cwd().parent.joinpath('voter_history_files/A-12147.csv')
import_csv = pd.read_csv(csv_path, converters={'County Name': str, 'Voter Registration Number': str, 'Election Date':str, 'Election Type':str, 'Party': str, 'Ballot Style':str, 'Absentee':str, 'Provisional':str, 'Supplemental':str})

In [3]:
import_csv

Unnamed: 0,County Name,Voter Registration Number,Election Date,Election Type,Party,Ballot Style,Absentee,Provisional,Supplemental
0,APPLING,00648862,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
1,APPLING,00651735,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
2,APPLING,03679018,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
3,DADE,05612337,12/06/2022,GENERAL ELECTION RUNOFF,,REGULAR,N,Y,N
4,GWINNETT,06954777,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY,N,N,N
...,...,...,...,...,...,...,...,...,...
3538425,FULTON,13530227,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE BY MAIL,Y,,N
3538426,FULTON,13167645,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N
3538427,FULTON,13502513,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N
3538428,DOUGLAS,04971785,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N


In [4]:
df = import_csv.copy()

In [5]:
df = df.rename(columns={'County Name': 'county_name', 'Voter Registration Number': 'voter_registration_number', 'Election Date': 'election_date', 'Election Type': 'election_type', 'Party': 'party', 'Ballot Style': 'ballot_style', 'Absentee': 'absentee', 'Provisional': 'provisional', 'Supplemental':'supplemental'})

In [6]:
def add_row_names(column):
    counter = 0
    row_name_list = []
    for row in column:
        counter += 1
        row_name_list.append(counter)
    return row_name_list

In [7]:
df.insert(0, 'row_names', add_row_names(df['county_name']))

In [8]:
def column_float_to_strings(column):
    string_list = []
    for entry in column:
        if str(entry) == 'nan':
            string_list.append("nan")
        else:
            string_list.append(str(entry))
    return string_list

In [9]:
def remove_last_two_digits(column):
    string_list = []
    for entry in column:
        string_list.append(entry[0:-2])
    return string_list

In [10]:
df['voter_registration_number'][df['voter_registration_number'].isnull()]

Series([], Name: voter_registration_number, dtype: object)

In [11]:
df

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
0,1,APPLING,00648862,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
1,2,APPLING,00651735,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
2,3,APPLING,03679018,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
3,4,DADE,05612337,12/06/2022,GENERAL ELECTION RUNOFF,,REGULAR,N,Y,N
4,5,GWINNETT,06954777,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY,N,N,N
...,...,...,...,...,...,...,...,...,...,...
3538425,3538426,FULTON,13530227,12/06/2022,GENERAL ELECTION RUNOFF,,ABSENTEE BY MAIL,Y,,N
3538426,3538427,FULTON,13167645,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N
3538427,3538428,FULTON,13502513,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N
3538428,3538429,DOUGLAS,04971785,12/06/2022,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N


In [12]:
def reformat_date(column):
    date_list = []
    for entry in column:
        date_list.append(f"{entry[6:10]}" + "-" + f"{entry[0:2]}" + "-" + f"{entry[3:5]}")
    return date_list

In [13]:
df['election_date'] = reformat_date(df['election_date'])

In [14]:
df

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
0,1,APPLING,00648862,2022-12-06,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
1,2,APPLING,00651735,2022-12-06,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
2,3,APPLING,03679018,2022-12-06,GENERAL ELECTION RUNOFF,,ABSENTEE,N,N,N
3,4,DADE,05612337,2022-12-06,GENERAL ELECTION RUNOFF,,REGULAR,N,Y,N
4,5,GWINNETT,06954777,2022-12-06,GENERAL ELECTION RUNOFF,,EARLY,N,N,N
...,...,...,...,...,...,...,...,...,...,...
3538425,3538426,FULTON,13530227,2022-12-06,GENERAL ELECTION RUNOFF,,ABSENTEE BY MAIL,Y,,N
3538426,3538427,FULTON,13167645,2022-12-06,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N
3538427,3538428,FULTON,13502513,2022-12-06,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N
3538428,3538429,DOUGLAS,04971785,2022-12-06,GENERAL ELECTION RUNOFF,,EARLY IN-PERSON,Y,,N


In [17]:
df[df['election_type'] != "GENERAL ELECTION RUNOFF"]

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
1631361,1631362,GWINNETT,01973288,2022-12-06,GENERAL,,ABSENTEE BY MAIL,Y,,N
1631362,1631363,SPALDING,08804677,2022-12-06,GENERAL,,ABSENTEE BY MAIL,Y,,N
1631370,1631371,DOUGHERTY,05916828,2022-12-06,GENERAL,,ABSENTEE BY MAIL,Y,,N
1631372,1631373,RICHMOND,07731788,2022-12-06,GENERAL,,ABSENTEE BY MAIL,Y,,N
1631378,1631379,MORGAN,06210249,2022-12-06,GENERAL,,ABSENTEE BY MAIL,Y,,N
...,...,...,...,...,...,...,...,...,...,...
2589961,2589962,NEWTON,01980010,2022-12-06,GENERAL,,EARLY IN-PERSON,Y,,N
2589962,2589963,MURRAY,02236419,2022-12-06,GENERAL,,EARLY IN-PERSON,Y,,N
2589963,2589964,FULTON,02365647,2022-12-06,GENERAL,,EARLY IN-PERSON,Y,,N
2633754,2633755,FULTON,02636318,2022-12-06,GENERAL,,EARLY IN-PERSON,Y,,N


In [16]:
df.value_counts('election_type')

election_type
GENERAL ELECTION RUNOFF    2709507
GENERAL                     828923
Name: count, dtype: int64

In [15]:
output_path = Path().cwd().parent.joinpath('output_csv/general_runoff_2022.csv')
df.to_csv(output_path)