In [4]:
# Import Dependencies
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [5]:
# Read CSVs

file_2016= "../Resources/GA_Active_Voters_2016.csv"
file_2020= "../Resources/GA_Active_Voters_2020.csv"

voter_data_2016= pd.read_csv(file_2016)
voter_data_2020= pd.read_csv(file_2020)

### Clean up columns

In [6]:
# Drop County Code column- same as index
voter_data_2016= voter_data_2016.drop(["COUNTY CODE"], axis=1)
voter_data_2016.head()

Unnamed: 0,COUNTY NAME,BLACK MALE,BLACK FEMALE,BLACK UNKNOWN,WHITE MALE,WHITE FEMALE,WHITE UNKNOWN,ASIA-PI MALE,ASIA-PI FEMALE,ASIA-PI UNKNOWN,...,NATIVE-AM MALE,NATIVE-AM FEMALE,NATIVE-AM UNKNOWN,OTHER MALE,OTHER FEMALE,OTHER UNKNOWN,UNKNOWN MALE,UNKNOWN FEMALE,UNKNOWN,TOTAL VOTERS
0,APPLING,393,692,0,2693,3102,0,10,8,0,...,2,0,0,7,9,0,56,75,0,7109
1,ATKINSON,200,310,0,917,1078,0,1,0,0,...,1,3,0,3,1,0,12,15,0,2633
2,BACON,106,267,1,1592,1887,1,7,4,0,...,2,0,0,2,7,0,73,75,0,4051
3,BAKER,225,365,0,415,433,0,3,4,0,...,0,1,0,1,1,0,8,11,0,1471
4,BALDWIN,2225,4078,1,4359,5054,6,42,44,0,...,6,5,0,25,27,0,157,176,2,16311


In [7]:
# Drop County ID column- same as index
voter_data_2020= voter_data_2020.drop(["COUNTY ID"], axis=1)
voter_data_2020.head()

Unnamed: 0,COUNTY NAME,AI MALE VOTERS,AI FEMALE VOTERS,AI UNKNOWN VOTERS,AP MALE VOTERS,AP FEMALE VOTERS,AP UNKNOWN VOTERS,BH MALE VOTERS,BH FEMALE VOTERS,BH UNKNOWN VOTERS,...,WH MALE VOTERS,WH FEMALE VOTERS,WH UNKNOWN VOTERS,OT MALE VOTERS,OT FEMALE VOTERS,OT UNKNOWN VOTERS,UK MALE VOTERS,UK FEMALE VOTERS,UK UNKNOWN VOTERS,TOTAL VOTERS
0,APPLING,10,5,0,26,28,0,886,1115,0,...,3913,4411,4,28,49,0,389,343,3,11440
1,ATKINSON,2,7,0,2,3,0,395,509,1,...,1514,1717,1,16,17,0,119,131,3,4801
2,BACON,2,3,0,8,10,0,322,525,1,...,2482,2838,4,18,20,0,216,172,1,6726
3,BAKER,1,1,0,5,7,0,408,514,0,...,572,610,0,5,6,0,58,51,0,2258
4,BALDWIN,16,15,1,106,137,0,4553,6322,5,...,6447,7300,14,85,114,0,753,642,3,26742


In [8]:
# Rename 2016 columns 
race_data_2016= voter_data_2016.rename(columns= {"COUNTY NAME":"ga_county",\
                                                 "BLACK MALE":"black_male",\
                                                 "BLACK FEMALE": "black_female",\
                                                 "BLACK UNKNOWN":"black_unknown",\
                                                 "WHITE MALE":"white_male",\
                                                 "WHITE FEMALE":"white_female",\
                                                 "WHITE UNKNOWN":"white_unknown",\
                                                 "ASIA-PI MALE":"asia_pi_male",\
                                                 "ASIA-PI FEMALE":"asia_pi_female",\
                                                 "ASIA-PI UNKNOWN":"asia_pi_unknown",\
                                                 "HISP-LT MALE":"hisp_lat_male",\
                                                 "HISP-LT FEMALE":"hisp_lat_female",\
                                                 "HISP-LT UNKNOWN":"hisp_lat_unknown",\
                                                 "NATIVE-AM MALE":"native_am_male",\
                                                 "NATIVE-AM FEMALE":"native_am_female",\
                                                 "NATIVE-AM UNKNOWN":"native_am_unknown",\
                                                 "OTHER MALE":"other_male",\
                                                 "OTHER FEMALE":"other_female",\
                                                 "OTHER UNKNOWN":"other_unknown",\
                                                 "UNKNOWN MALE":"unknown_male",\
                                                 "UNKNOWN FEMALE":"unknown_female",\
                                                 "UNKNOWN":"unknown",\
                                                 "TOTAL VOTERS":"total_voters"})
                                                 
race_data_2016.head()

Unnamed: 0,ga_county,black_male,black_female,black_unknown,white_male,white_female,white_unknown,asia_pi_male,asia_pi_female,asia_pi_unknown,...,native_am_male,native_am_female,native_am_unknown,other_male,other_female,other_unknown,unknown_male,unknown_female,unknown,total_voters
0,APPLING,393,692,0,2693,3102,0,10,8,0,...,2,0,0,7,9,0,56,75,0,7109
1,ATKINSON,200,310,0,917,1078,0,1,0,0,...,1,3,0,3,1,0,12,15,0,2633
2,BACON,106,267,1,1592,1887,1,7,4,0,...,2,0,0,2,7,0,73,75,0,4051
3,BAKER,225,365,0,415,433,0,3,4,0,...,0,1,0,1,1,0,8,11,0,1471
4,BALDWIN,2225,4078,1,4359,5054,6,42,44,0,...,6,5,0,25,27,0,157,176,2,16311


In [9]:
# Rename 2020 columns
race_data_2020= voter_data_2020.rename(columns= {"COUNTY NAME":"ga_county",\
                                                 "AI MALE VOTERS":"native_am_male",\
                                                 "AI FEMALE VOTERS":"native_am_female",\
                                                 "AI UNKNOWN VOTERS":"native_am_unknown",\
                                                 "AP MALE VOTERS":"asia_pi_male",\
                                                 "AP FEMALE VOTERS":"asia_pi_female",\
                                                 "AP UNKNOWN VOTERS":"asia_pi_unknown",\
                                                 "BH MALE VOTERS":"black_male",\
                                                 "BH FEMALE VOTERS":"black_female",\
                                                 "BH UNKNOWN VOTERS":"black_unknown",\
                                                 "HP MALE VOTERS":"hisp_lat_male",\
                                                 "HP FEMALE VOTERS":"hisp_lat_female",\
                                                 "HP UNKNOWN VOTERS":"hisp_lat_unknown",\
                                                 "WH MALE VOTERS":"white_male",\
                                                 "WH FEMALE VOTERS":"white_female",\
                                                 "WH UNKNOWN VOTERS":"white_unknown",\
                                                 "OT MALE VOTERS":"other_male",\
                                                 "OT FEMALE VOTERS":"other_female",\
                                                 "OT UNKNOWN VOTERS":"other_unknown",\
                                                 "UK MALE VOTERS":"unknown_male",\
                                                 "UK FEMALE VOTERS":"unknown_female",\
                                                 "UK UNKNOWN VOTERS":"unknown_voters",\
                                                 "TOTAL VOTERS":"total_voters"})

race_data_2020.head()

Unnamed: 0,ga_county,native_am_male,native_am_female,native_am_unknown,asia_pi_male,asia_pi_female,asia_pi_unknown,black_male,black_female,black_unknown,...,white_male,white_female,white_unknown,other_male,other_female,other_unknown,unknown_male,unknown_female,unknown_voters,total_voters
0,APPLING,10,5,0,26,28,0,886,1115,0,...,3913,4411,4,28,49,0,389,343,3,11440
1,ATKINSON,2,7,0,2,3,0,395,509,1,...,1514,1717,1,16,17,0,119,131,3,4801
2,BACON,2,3,0,8,10,0,322,525,1,...,2482,2838,4,18,20,0,216,172,1,6726
3,BAKER,1,1,0,5,7,0,408,514,0,...,572,610,0,5,6,0,58,51,0,2258
4,BALDWIN,16,15,1,106,137,0,4553,6322,5,...,6447,7300,14,85,114,0,753,642,3,26742


In [10]:
# Check column names
list(race_data_2016.columns)

['ga_county',
 'black_male',
 'black_female',
 'black_unknown',
 'white_male',
 'white_female',
 'white_unknown',
 'asia_pi_male',
 'asia_pi_female',
 'asia_pi_unknown',
 'hisp_lat_male',
 'hisp_lat_female',
 'hisp_lat_unknown',
 'native_am_male',
 'native_am_female',
 'native_am_unknown',
 'other_male',
 'other_female',
 'other_unknown',
 'unknown_male',
 'unknown_female',
 'unknown',
 'total_voters']

In [11]:
# Confirm column types
race_data_2016.dtypes

ga_county            object
black_male            int64
black_female          int64
black_unknown         int64
white_male            int64
white_female          int64
white_unknown         int64
asia_pi_male          int64
asia_pi_female        int64
asia_pi_unknown       int64
hisp_lat_male         int64
hisp_lat_female       int64
hisp_lat_unknown      int64
native_am_male        int64
native_am_female      int64
native_am_unknown     int64
other_male            int64
other_female          int64
other_unknown         int64
unknown_male          int64
unknown_female        int64
unknown               int64
total_voters          int64
dtype: object

In [12]:
# Confirm column types
race_data_2020.dtypes

ga_county            object
native_am_male        int64
native_am_female      int64
native_am_unknown     int64
asia_pi_male          int64
asia_pi_female        int64
asia_pi_unknown       int64
black_male            int64
black_female          int64
black_unknown         int64
hisp_lat_male         int64
hisp_lat_female       int64
hisp_lat_unknown      int64
white_male            int64
white_female          int64
white_unknown         int64
other_male            int64
other_female          int64
other_unknown         int64
unknown_male          int64
unknown_female        int64
unknown_voters        int64
total_voters          int64
dtype: object

### Connect to local database

In [13]:
rds_connection_string = "postgres:postgres@localhost:5432/ga_election_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [14]:
engine.table_names()

['election_2020_ga',
 'primary_2016_ga',
 'race_data_2016',
 'fulton_trump_2020',
 'fulton_biden_2020',
 'race_data_2020']

### Use pandas to load CSV converted DataFrame into database

In [11]:
# Send 2016 df to database table called race_data_2016 
race_data_2016.to_sql(name="race_data_2016", con=engine, if_exists='append', index=False)

In [16]:
# Send 2020 df to database table called race_data_2020 
race_data_2020.to_sql(name="race_data_2020", con=engine, if_exists='append', index=False)

### Confirm data has been added by querying tables

In [15]:
# 2016
pd.read_sql_query("select * from race_data_2016", con=engine)

Unnamed: 0,ga_county,black_male,black_female,black_unknown,white_male,white_female,white_unknown,asia_pi_male,asia_pi_female,asia_pi_unknown,...,native_am_male,native_am_female,native_am_unknown,other_male,other_female,other_unknown,unknown_male,unknown_female,unknown,total_voters
0,APPLING,393,692,0,2693,3102,0,10,8,0,...,2,0,0,7,9,0,56,75,0,7109
1,ATKINSON,200,310,0,917,1078,0,1,0,0,...,1,3,0,3,1,0,12,15,0,2633
2,BACON,106,267,1,1592,1887,1,7,4,0,...,2,0,0,2,7,0,73,75,0,4051
3,BAKER,225,365,0,415,433,0,3,4,0,...,0,1,0,1,1,0,8,11,0,1471
4,BALDWIN,2225,4078,1,4359,5054,6,42,44,0,...,6,5,0,25,27,0,157,176,2,16311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,WILCOX,260,496,0,1042,1156,1,4,3,0,...,2,0,0,2,4,0,13,9,0,3011
156,WILKES,546,1050,0,1336,1486,0,6,6,0,...,1,3,0,4,8,0,33,47,0,4531
157,WILKINSON,630,1103,1,1187,1311,0,1,3,0,...,0,0,0,4,4,0,39,33,1,4321
158,WORTH,593,1117,0,3061,3347,2,1,11,0,...,1,2,0,25,14,0,80,80,3,8365


In [17]:
# 2020
pd.read_sql_query("select * from race_data_2020", con=engine)

Unnamed: 0,ga_county,black_male,black_female,black_unknown,white_male,white_female,white_unknown,asia_pi_male,asia_pi_female,asia_pi_unknown,...,native_am_male,native_am_female,native_am_unknown,other_male,other_female,other_unknown,unknown_male,unknown_female,unknown_voters,total_voters
0,APPLING,886,1115,0,3913,4411,4,26,28,0,...,10,5,0,28,49,0,389,343,3,11440
1,ATKINSON,395,509,1,1514,1717,1,2,3,0,...,2,7,0,16,17,0,119,131,3,4801
2,BACON,322,525,1,2482,2838,4,8,10,0,...,2,3,0,18,20,0,216,172,1,6726
3,BAKER,408,514,0,572,610,0,5,7,0,...,1,1,0,5,6,0,58,51,0,2258
4,BALDWIN,4553,6322,5,6447,7300,14,106,137,0,...,16,15,1,85,114,0,753,642,3,26742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,WILCOX,533,705,0,1483,1631,1,5,10,0,...,4,1,0,15,11,0,64,52,0,4562
156,WILKES,1108,1470,6,1819,1936,1,17,14,0,...,6,6,0,20,33,0,202,152,4,6836
157,WILKINSON,1113,1405,4,1748,1847,3,2,10,0,...,4,6,0,16,21,0,143,140,0,6484
158,WORTH,1481,1917,4,4582,4903,7,9,23,0,...,6,9,0,42,38,0,421,320,4,13847
