In [1]:
import pandas as pd
import datetime
import mariadb
import numpy as np
from pathlib import Path

In [2]:
csv_path = Path().cwd().parent.joinpath('voter_history_files/A-12308.csv')
import_csv = pd.read_csv(csv_path, converters={'County Name': str, 'Voter Registration Number': str, 'Election Date':str, 'Election Type':str, 'Party': str, 'Ballot Style':str, 'Absentee':str, 'Provisional':str, 'Supplemental':str})
output_name = 'primary_2024.csv'

In [3]:
import_csv

Unnamed: 0,County Name,Voter Registration Number,Election Date,Election Type,Party,Ballot Style,Absentee,Provisional,Supplemental
0,JEFFERSON,00240170,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
1,EVANS,03342675,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
2,HARRIS,02834398,03/12/2024,GENERAL PRIMARY,REPUBLICAN,ABSENTEE BY MAIL,Y,,N
3,MCDUFFIE,00214544,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
4,STEPHENS,01291547,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
...,...,...,...,...,...,...,...,...,...
886245,DOUGLAS,11560651,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ELECTION DAY (BMD),Y,N,N
886246,DOUGLAS,02967276,03/12/2024,GENERAL PRIMARY,REPUBLICAN,ELECTION DAY (BMD),Y,N,N
886247,FULTON,02642641,03/12/2024,GENERAL PRIMARY,REPUBLICAN,EARLY IN-PERSON,Y,,N
886248,TROUP,04825054,03/12/2024,GENERAL PRIMARY,REPUBLICAN,ELECTION DAY (BMD),Y,N,N


In [4]:
df = import_csv.copy()

In [5]:
df = df.rename(columns={'County Name': 'county_name', 'Voter Registration Number': 'voter_registration_number', 'Election Date': 'election_date', 'Election Type': 'election_type', 'Party': 'party', 'Ballot Style': 'ballot_style', 'Absentee': 'absentee', 'Provisional': 'provisional', 'Supplemental':'supplemental'})

In [6]:
def add_row_names(column):
    counter = 0
    row_name_list = []
    for row in column:
        counter += 1
        row_name_list.append(counter)
    return row_name_list

In [7]:
df.insert(0, 'row_names', add_row_names(df['county_name']))

In [8]:
def column_float_to_strings(column):
    string_list = []
    for entry in column:
        if str(entry) == 'nan':
            string_list.append("nan")
        else:
            string_list.append(str(entry))
    return string_list

In [9]:
def remove_last_two_digits(column):
    string_list = []
    for entry in column:
        string_list.append(entry[0:-2])
    return string_list

In [10]:
df['voter_registration_number'][df['voter_registration_number'].isnull()]

Series([], Name: voter_registration_number, dtype: object)

In [11]:
df

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
0,1,JEFFERSON,00240170,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
1,2,EVANS,03342675,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
2,3,HARRIS,02834398,03/12/2024,GENERAL PRIMARY,REPUBLICAN,ABSENTEE BY MAIL,Y,,N
3,4,MCDUFFIE,00214544,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
4,5,STEPHENS,01291547,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
...,...,...,...,...,...,...,...,...,...,...
886245,886246,DOUGLAS,11560651,03/12/2024,GENERAL PRIMARY,DEMOCRAT,ELECTION DAY (BMD),Y,N,N
886246,886247,DOUGLAS,02967276,03/12/2024,GENERAL PRIMARY,REPUBLICAN,ELECTION DAY (BMD),Y,N,N
886247,886248,FULTON,02642641,03/12/2024,GENERAL PRIMARY,REPUBLICAN,EARLY IN-PERSON,Y,,N
886248,886249,TROUP,04825054,03/12/2024,GENERAL PRIMARY,REPUBLICAN,ELECTION DAY (BMD),Y,N,N


In [12]:
def reformat_date(column):
    date_list = []
    for entry in column:
        date_list.append(f"{entry[6:10]}" + "-" + f"{entry[0:2]}" + "-" + f"{entry[3:5]}")
    return date_list

In [13]:
df['election_date'] = reformat_date(df['election_date'])

In [14]:
df

Unnamed: 0,row_names,county_name,voter_registration_number,election_date,election_type,party,ballot_style,absentee,provisional,supplemental
0,1,JEFFERSON,00240170,2024-03-12,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
1,2,EVANS,03342675,2024-03-12,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
2,3,HARRIS,02834398,2024-03-12,GENERAL PRIMARY,REPUBLICAN,ABSENTEE BY MAIL,Y,,N
3,4,MCDUFFIE,00214544,2024-03-12,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
4,5,STEPHENS,01291547,2024-03-12,GENERAL PRIMARY,DEMOCRAT,ABSENTEE BY MAIL,Y,,N
...,...,...,...,...,...,...,...,...,...,...
886245,886246,DOUGLAS,11560651,2024-03-12,GENERAL PRIMARY,DEMOCRAT,ELECTION DAY (BMD),Y,N,N
886246,886247,DOUGLAS,02967276,2024-03-12,GENERAL PRIMARY,REPUBLICAN,ELECTION DAY (BMD),Y,N,N
886247,886248,FULTON,02642641,2024-03-12,GENERAL PRIMARY,REPUBLICAN,EARLY IN-PERSON,Y,,N
886248,886249,TROUP,04825054,2024-03-12,GENERAL PRIMARY,REPUBLICAN,ELECTION DAY (BMD),Y,N,N


In [17]:
df.value_counts('election_type')

election_type
GENERAL PRIMARY    886240
                        5
PPP                     3
STATEWIDE               2
Name: count, dtype: int64

In [15]:
output_path = Path().cwd().parent.joinpath('output_csv/primary_2024.csv')
df.to_csv(output_path)