In [1]:
# Data provided by HealthData.gov
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# Get PostgreSql password from config file
from apikeys_config import pgAdmin_pw

In [3]:
# Store filepath in a variable
raw_data = "../Resources/COVID_state_policy_updates_20201013_1152.csv"

# Read our Data file with the pandas library
raw_data_df = pd.read_csv(raw_data, encoding="utf-8")
raw_data_df

Unnamed: 0,state_id,county,fips_code,policy_level,date,policy_type,start_stop,comments,source,total_phases
0,GA,DeKalb County,13089.0,county,2020-03-28,Shelter in Place,start,DeKalb County Policy Start,wikidata,
1,MS,Wayne,28153.0,county,2020-07-13,Houses of Worship,start,Policy_Details: Executive Order by Governor Ta...,sip_submission_form: https://www.sos.ms.gov/co...,
2,SC,,,state,2020-03-17,Stop Initiation Of Evictions Overall Or Due To...,start,Other measures and details for this policy inc...,BU COVID-19 State Policy Database,
3,MO,Pike County,29163.0,county,2020-04-05,Shelter in Place,start,Pike County Policy Start,wikidata,
4,OH,Trumbull,39155.0,county,2020-07-08,Mask Requirement,start,Policy_Details: face coverings required in any...,sip_submission_form: https://coronavirus.ohio....,
...,...,...,...,...,...,...,...,...,...,...
2669,TX,Robertson County,48395.0,county,2020-04-30,Shelter in Place,stop,Robertson County Policy End,wikidata,
2670,NC,,,state,2020-05-08,Shelter in Place,stop,State Policy End,manual,
2671,PA,Berks County,42011.0,county,2020-04-30,Shelter in Place,stop,Berks County Policy End,wikidata,
2672,PA,Luzerne County,42079.0,county,2020-04-30,Shelter in Place,stop,Luzerne County Policy End,wikidata,


In [4]:
# Determine the number of records
number_of_records = len(raw_data_df)
number_of_records

2674

In [5]:
# Show the dataframe columns
raw_data_df.columns

Index(['state_id', 'county', 'fips_code', 'policy_level', 'date',
       'policy_type', 'start_stop', 'comments', 'source', 'total_phases'],
      dtype='object')

In [6]:
# List the unique "policy_type" to determine the stay-at-home-order types of policies
raw_data_df["policy_type"].unique()

array(['Shelter in Place', 'Houses of Worship',
       'Stop Initiation Of Evictions Overall Or Due To Covid Related Issues',
       'Mask Requirement', 'Phase 2', 'New Phase', 'Food and Drink',
       'Modify Medicaid Requirements With 1135 Waivers Date Of CMS Approval',
       'Stop Enforcement Of Evictions Overall Or Due To Covid Related Issues',
       'Childcare (K-12)', 'Outdoor and Recreation',
       'Non-Essential Businesses', 'Entertainment', 'Gyms',
       'Allow Audio Only Telehealth',
       'Mandate Face Mask Use By All Individuals In Public Facing Businesses',
       'Public Health Advisory System',
       'Mandate Face Mask Use By All Individuals In Public Spaces',
       'Resumed Elective Medical Procedures', 'State of Emergency',
       'Reopened ACA Enrollment Using a Special Enrollment Period',
       'Suspended Elective Medical Dental Procedures',
       'Order Freezing Utility Shut Offs',
       'Allow Expand Medicaid Telehealth Coverage', 'Day Care', 'Phase 3',
 

In [7]:
# We want Stay-at-Home Orders, which indicates the dates on which states "closed down," which impacted real estate sales
# The policy_type for Stay-at-Home orders is "Shelter in Place"
reduced_df = raw_data_df[raw_data_df.policy_type == 'Shelter in Place']
reduced_df

Unnamed: 0,state_id,county,fips_code,policy_level,date,policy_type,start_stop,comments,source,total_phases
0,GA,DeKalb County,13089.0,county,2020-03-28,Shelter in Place,start,DeKalb County Policy Start,wikidata,
3,MO,Pike County,29163.0,county,2020-04-05,Shelter in Place,start,Pike County Policy Start,wikidata,
9,GA,Lowndes County,13185.0,county,2020-03-24,Shelter in Place,start,Lowndes County Policy Start,wikidata,
11,TX,San Jacinto County,48407.0,county,2020-03-25,Shelter in Place,start,San Jacinto County Policy Start,wikidata,
13,PA,Luzerne County,42079.0,county,2020-03-27,Shelter in Place,start,Luzerne County Policy Start,wikidata,
...,...,...,...,...,...,...,...,...,...,...
2668,TN,Knox County,47093.0,county,2020-04-07,Shelter in Place,stop,Knox County Policy End,wikidata,
2669,TX,Robertson County,48395.0,county,2020-04-30,Shelter in Place,stop,Robertson County Policy End,wikidata,
2670,NC,,,state,2020-05-08,Shelter in Place,stop,State Policy End,manual,
2671,PA,Berks County,42011.0,county,2020-04-30,Shelter in Place,stop,Berks County Policy End,wikidata,


In [8]:
# Select columns_to_keep
columns_to_keep=[
    "state_id", 
    "policy_type", 
    "start_stop",
    "date"]

shelter_in_place_df=reduced_df[columns_to_keep]
shelter_in_place_df

Unnamed: 0,state_id,policy_type,start_stop,date
0,GA,Shelter in Place,start,2020-03-28
3,MO,Shelter in Place,start,2020-04-05
9,GA,Shelter in Place,start,2020-03-24
11,TX,Shelter in Place,start,2020-03-25
13,PA,Shelter in Place,start,2020-03-27
...,...,...,...,...
2668,TN,Shelter in Place,stop,2020-04-07
2669,TX,Shelter in Place,stop,2020-04-30
2670,NC,Shelter in Place,stop,2020-05-08
2671,PA,Shelter in Place,stop,2020-04-30


In [9]:
# Group start records by state and select earliest start date
start_dates_df = shelter_in_place_df.start_stop == "start"

start_dates_df = shelter_in_place_df.groupby('state_id').agg({'date':[min]})
start_dates_df

Unnamed: 0_level_0,date
Unnamed: 0_level_1,min
state_id,Unnamed: 1_level_2
AK,2020-03-22
AL,2020-04-04
AZ,2020-03-31
CA,2020-03-17
CO,2020-03-19
CT,2020-03-23
DC,2020-03-23
DE,2020-03-24
FL,2020-03-24
GA,2020-03-22


In [10]:
# Create lists to hold response info
state_id_list = []
date_list = []
policy_type = []
start_stop = []

state_id_list = start_dates_df.index.tolist()
date_list = start_dates_df.values.tolist()

number_records = len(state_id_list)

# Loop through state start records and load lists
for item in range(number_records):
    policy_type.append('Shelter in Place')
    start_stop.append('start')

# Define & Load Dictionary
start_dates_dict = {
    "state_id":state_id_list,
    "policy_type":policy_type,
    "start_stop":start_stop,
    "date":date_list
}

# Convert the results to a Pandas dataframe
new_start_dates_df = pd.DataFrame(start_dates_dict)

# View Pandas dataframe
new_start_dates_df

Unnamed: 0,state_id,policy_type,start_stop,date
0,AK,Shelter in Place,start,[2020-03-22]
1,AL,Shelter in Place,start,[2020-04-04]
2,AZ,Shelter in Place,start,[2020-03-31]
3,CA,Shelter in Place,start,[2020-03-17]
4,CO,Shelter in Place,start,[2020-03-19]
5,CT,Shelter in Place,start,[2020-03-23]
6,DC,Shelter in Place,start,[2020-03-23]
7,DE,Shelter in Place,start,[2020-03-24]
8,FL,Shelter in Place,start,[2020-03-24]
9,GA,Shelter in Place,start,[2020-03-22]


In [11]:
# Convert Date to a string
new_start_dates_df['date'] = new_start_dates_df['date'].apply(lambda x: ' '.join(map(str, x)))
new_start_dates_df

Unnamed: 0,state_id,policy_type,start_stop,date
0,AK,Shelter in Place,start,2020-03-22
1,AL,Shelter in Place,start,2020-04-04
2,AZ,Shelter in Place,start,2020-03-31
3,CA,Shelter in Place,start,2020-03-17
4,CO,Shelter in Place,start,2020-03-19
5,CT,Shelter in Place,start,2020-03-23
6,DC,Shelter in Place,start,2020-03-23
7,DE,Shelter in Place,start,2020-03-24
8,FL,Shelter in Place,start,2020-03-24
9,GA,Shelter in Place,start,2020-03-22


In [12]:
# Group end records by state and select latest end date
stop_dates_df = shelter_in_place_df.start_stop == "stop"

# Group end records by state and select earliest stop date
stop_dates_df = shelter_in_place_df.groupby('state_id').agg({'date':[max]})
stop_dates_df

Unnamed: 0_level_0,date
Unnamed: 0_level_1,max
state_id,Unnamed: 1_level_2
AK,2020-04-24
AL,2020-04-30
AZ,2020-05-15
CA,2020-05-31
CO,2020-05-01
CT,2020-05-20
DC,2020-06-08
DE,2020-05-31
FL,2020-05-04
GA,2020-04-27


In [13]:
# Create lists to hold response info
state_id_list = []
date_list = []
policy_type = []
start_stop = []

state_id_list = stop_dates_df.index.tolist()
date_list = stop_dates_df.values.tolist()

number_records = len(state_id_list)

# Loop through state start records and load lists
for item in range(number_records):
    policy_type.append('Shelter in Place')
    start_stop.append('stop')

# Define & Load Dictionary
stop_dates_dict = {
    "state_id":state_id_list,
    "policy_type":policy_type,
    "start_stop":start_stop,
    "date":date_list
}

# Convert the results to a Pandas dataframe
new_stop_dates_df = pd.DataFrame(stop_dates_dict)

# View Pandas dataframe
new_stop_dates_df

Unnamed: 0,state_id,policy_type,start_stop,date
0,AK,Shelter in Place,stop,[2020-04-24]
1,AL,Shelter in Place,stop,[2020-04-30]
2,AZ,Shelter in Place,stop,[2020-05-15]
3,CA,Shelter in Place,stop,[2020-05-31]
4,CO,Shelter in Place,stop,[2020-05-01]
5,CT,Shelter in Place,stop,[2020-05-20]
6,DC,Shelter in Place,stop,[2020-06-08]
7,DE,Shelter in Place,stop,[2020-05-31]
8,FL,Shelter in Place,stop,[2020-05-04]
9,GA,Shelter in Place,stop,[2020-04-27]


In [14]:
# Convert Date to a string
new_stop_dates_df['date'] = new_stop_dates_df['date'].apply(lambda x: ' '.join(map(str, x)))
new_stop_dates_df

Unnamed: 0,state_id,policy_type,start_stop,date
0,AK,Shelter in Place,stop,2020-04-24
1,AL,Shelter in Place,stop,2020-04-30
2,AZ,Shelter in Place,stop,2020-05-15
3,CA,Shelter in Place,stop,2020-05-31
4,CO,Shelter in Place,stop,2020-05-01
5,CT,Shelter in Place,stop,2020-05-20
6,DC,Shelter in Place,stop,2020-06-08
7,DE,Shelter in Place,stop,2020-05-31
8,FL,Shelter in Place,stop,2020-05-04
9,GA,Shelter in Place,stop,2020-04-27


In [15]:
# Combine the shelter_in_place dataframe with the new_end_dates dataframe--join on state_id
new_shelter_in_place_df = pd.merge(new_start_dates_df, new_stop_dates_df, on=['state_id', 'policy_type'], how='outer')
new_shelter_in_place_df


Unnamed: 0,state_id,policy_type,start_stop_x,date_x,start_stop_y,date_y
0,AK,Shelter in Place,start,2020-03-22,stop,2020-04-24
1,AL,Shelter in Place,start,2020-04-04,stop,2020-04-30
2,AZ,Shelter in Place,start,2020-03-31,stop,2020-05-15
3,CA,Shelter in Place,start,2020-03-17,stop,2020-05-31
4,CO,Shelter in Place,start,2020-03-19,stop,2020-05-01
5,CT,Shelter in Place,start,2020-03-23,stop,2020-05-20
6,DC,Shelter in Place,start,2020-03-23,stop,2020-06-08
7,DE,Shelter in Place,start,2020-03-24,stop,2020-05-31
8,FL,Shelter in Place,start,2020-03-24,stop,2020-05-04
9,GA,Shelter in Place,start,2020-03-22,stop,2020-04-27


In [16]:
# Rename merged columns and drop the start and end columns
new_shelter_in_place_df = new_shelter_in_place_df.rename(columns={"date_x": "start_date",
                                                                  "date_y": "stop_date"})

new_shelter_in_place_df = new_shelter_in_place_df.drop('start_stop_x',axis=1)
new_shelter_in_place_df = new_shelter_in_place_df.drop('start_stop_y',axis=1)

new_shelter_in_place_df

Unnamed: 0,state_id,policy_type,start_date,stop_date
0,AK,Shelter in Place,2020-03-22,2020-04-24
1,AL,Shelter in Place,2020-04-04,2020-04-30
2,AZ,Shelter in Place,2020-03-31,2020-05-15
3,CA,Shelter in Place,2020-03-17,2020-05-31
4,CO,Shelter in Place,2020-03-19,2020-05-01
5,CT,Shelter in Place,2020-03-23,2020-05-20
6,DC,Shelter in Place,2020-03-23,2020-06-08
7,DE,Shelter in Place,2020-03-24,2020-05-31
8,FL,Shelter in Place,2020-03-24,2020-05-04
9,GA,Shelter in Place,2020-03-22,2020-04-27


In [17]:
# Add new columns: start_month and end_month based on the start_date and end_date
new_shelter_in_place_df['start_month'] = pd.DatetimeIndex(new_shelter_in_place_df['start_date']).month
new_shelter_in_place_df['stop_month'] = pd.DatetimeIndex(new_shelter_in_place_df['stop_date']).month
new_shelter_in_place_df

Unnamed: 0,state_id,policy_type,start_date,stop_date,start_month,stop_month
0,AK,Shelter in Place,2020-03-22,2020-04-24,3,4
1,AL,Shelter in Place,2020-04-04,2020-04-30,4,4
2,AZ,Shelter in Place,2020-03-31,2020-05-15,3,5
3,CA,Shelter in Place,2020-03-17,2020-05-31,3,5
4,CO,Shelter in Place,2020-03-19,2020-05-01,3,5
5,CT,Shelter in Place,2020-03-23,2020-05-20,3,5
6,DC,Shelter in Place,2020-03-23,2020-06-08,3,6
7,DE,Shelter in Place,2020-03-24,2020-05-31,3,5
8,FL,Shelter in Place,2020-03-24,2020-05-04,3,5
9,GA,Shelter in Place,2020-03-22,2020-04-27,3,4


In [18]:
# Create Postgres database connection
connection_string = f"postgres:{pgAdmin_pw}@localhost:5432/Real_Estate_Analysis_db"
engine = create_engine(f'postgresql://{connection_string}')

In [20]:
# Confirm tables
engine.table_names()

['covid_closed_states', 'employment', 'real_estate_sales']

In [21]:
# Use Pandas to load dataframes into tables
new_shelter_in_place_df.to_sql(name='covid_closed_states', con=engine, if_exists='append', index=False)

In [22]:
# Confirm data has been added by querying the COVID closed states table
pd.read_sql_query('select * from covid_closed_states', con=engine)

Unnamed: 0,state_id,policy_type,start_date,stop_date,start_month,stop_month
0,AK,Shelter in Place,2020-03-22,2020-04-24,3,4
1,AL,Shelter in Place,2020-04-04,2020-04-30,4,4
2,AZ,Shelter in Place,2020-03-31,2020-05-15,3,5
3,CA,Shelter in Place,2020-03-17,2020-05-31,3,5
4,CO,Shelter in Place,2020-03-19,2020-05-01,3,5
5,CT,Shelter in Place,2020-03-23,2020-05-20,3,5
6,DC,Shelter in Place,2020-03-23,2020-06-08,3,6
7,DE,Shelter in Place,2020-03-24,2020-05-31,3,5
8,FL,Shelter in Place,2020-03-24,2020-05-04,3,5
9,GA,Shelter in Place,2020-03-22,2020-04-27,3,4


In [23]:
# Write dataframe to JSON file for plotting data in JavaScript
# This is a temporary step until Heroku is working on website
# Valid orientation types are: 'split', 'records', 'index', 'values', 'table', 'columns' (default)
new_shelter_in_place_df.to_json(r'../data/covidrecords.json', orient='records')