In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [2]:
us_counties_file = "live/us-counties.csv"
counties_df = pd.read_csv(us_counties_file)
counties_df.head()

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths
0,2020-08-01,Autauga,Alabama,1001.0,1015,21.0,972.0,20.0,43.0,1.0
1,2020-08-01,Baldwin,Alabama,1003.0,3101,22.0,3056.0,21.0,45.0,1.0
2,2020-08-01,Barbour,Alabama,1005.0,598,5.0,550.0,5.0,48.0,0.0
3,2020-08-01,Bibb,Alabama,1007.0,363,2.0,355.0,2.0,8.0,0.0
4,2020-08-01,Blount,Alabama,1009.0,767,3.0,685.0,3.0,82.0,0.0


### Transform County DataFrame ###

In [3]:
# Create a filtered dataframe from specific columns
# counties_df.info()
counties_cols = ["county", "state", "fips","confirmed_cases","confirmed_deaths"]
counties_transformed= counties_df[counties_cols].copy()

# Rename the column headers
counties_transformed = counties_transformed.rename(columns={"fips": "COUNTYFP",
                                                              "county": "COUNTY",
                                                              "state": "STATE",
                                                                "confirmed_cases":"CONFIRMED_CASES",
                                                                "confirmed_deaths":"CONFIRMED_DEATHS"})

counties_transformed['COUNTYFP'] = counties_transformed['COUNTYFP'].astype('O')
counties_transformed.loc[counties_transformed['COUNTYFP'].notnull(), 'COUNTYFP'] = counties_transformed.loc[counties_transformed['COUNTYFP'].notnull(), 'COUNTYFP'].astype(int)

counties_transformed['CONFIRMED_CASES'] = counties_transformed['CONFIRMED_CASES'].astype('O')
counties_transformed.loc[counties_transformed['CONFIRMED_CASES'].notnull(), 'CONFIRMED_CASES'] = counties_transformed.loc[counties_transformed['CONFIRMED_CASES'].notnull(), 'CONFIRMED_CASES'].astype(int)

counties_transformed['CONFIRMED_DEATHS'] = counties_transformed['CONFIRMED_DEATHS'].astype('O')
counties_transformed.loc[counties_transformed['CONFIRMED_DEATHS'].notnull(), 'CONFIRMED_DEATHS'] = counties_transformed.loc[counties_transformed['CONFIRMED_DEATHS'].notnull(), 'CONFIRMED_DEATHS'].astype(int)

counties_transformed.head()

Unnamed: 0,COUNTY,STATE,COUNTYFP,CONFIRMED_CASES,CONFIRMED_DEATHS
0,Autauga,Alabama,1001,972,20
1,Baldwin,Alabama,1003,3056,21
2,Barbour,Alabama,1005,550,5
3,Bibb,Alabama,1007,355,2
4,Blount,Alabama,1009,685,3


### Transform Mask-Usage Dataframe###

In [4]:
mask_use_file = "mask-use/mask-use-by-county.csv"
mask_use_df = pd.read_csv(mask_use_file)
mask_use_df.head()

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.12,0.201,0.491
3,1007,0.02,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.18,0.194,0.459


In [5]:
mask_use_df.set_index("COUNTYFP", inplace=True)

mask_use_df['NEVER']= mask_use_df['NEVER'].astype(float).map("{:,.2f}%".format)

mask_use_df['RARELY']= mask_use_df['RARELY'].astype(float).map("{:,.2f}%".format)

mask_use_df['SOMETIMES']= mask_use_df['SOMETIMES'].astype(float).map("{:,.2f}%".format)

mask_use_df['FREQUENTLY']= mask_use_df['FREQUENTLY'].astype(float).map("{:,.2f}%".format)

mask_use_df['ALWAYS']= mask_use_df['ALWAYS'].astype(float).map("{:,.2f}%".format)


mask_use_df

Unnamed: 0_level_0,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
COUNTYFP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1001,0.05%,0.07%,0.13%,0.29%,0.44%
1003,0.08%,0.06%,0.10%,0.32%,0.44%
1005,0.07%,0.12%,0.12%,0.20%,0.49%
1007,0.02%,0.03%,0.10%,0.28%,0.57%
1009,0.05%,0.11%,0.18%,0.19%,0.46%
...,...,...,...,...,...
56037,0.06%,0.29%,0.23%,0.15%,0.27%
56039,0.10%,0.16%,0.16%,0.25%,0.34%
56041,0.10%,0.28%,0.15%,0.21%,0.26%
56043,0.20%,0.15%,0.07%,0.28%,0.29%


<!-- Create database connection -->

### Create Database Connection ###

In [None]:
connection_string = "postgres:postgres@localhost:5432/covidmask_db"
engine = create_engine(f'postgresql://{connection_string}')

In [None]:
# Confirm tables
engine.table_names()

### Load DataFrames into Database ###

In [None]:
counties_transformed.to_sql(name='county', con=engine, if_exists='append', index=True)

In [None]:
mask_use_df.to_sql(name='mask', con=engine, if_exists='append', index=True)