In [1]:
import pandas as pd
import numpy as np

In [27]:
df = pd.read_excel("data/state_policies_raw.xlsx")
columns = df.columns
units = df.iloc[3].to_numpy()

## Keep only the columns that are "date"
date_columns = [col for col, unit in zip(columns, units) if unit == "date"]
rel_columns = ["STATE", "POSTCODE"] + date_columns
df = df[rel_columns]

In [61]:
state_of_interest = "Alabama"

## Get the row of the state of interest
def get_policy_info_about_state(state_of_interest):
    col_desc = df.iloc[0]
    rest_type_row = df.iloc[2]
    state_row = df[df["STATE"] == state_of_interest].iloc[0]

    state = []
    state_abbr = []
    restriction_desc = []
    restriction_type = []
    restriction_date = []

    for idx in range(0, len(state_row)):
        if state_row.iloc[idx] != 0 and type(state_row.iloc[idx]) != str:
            state.append(state_of_interest)
            state_abbr.append(state_row["POSTCODE"])
            restriction_desc.append(col_desc.iloc[idx])
            restriction_type.append(rest_type_row.iloc[idx])
            restriction_date.append(state_row.iloc[idx])
    
    return state, state_abbr, restriction_desc, restriction_type, restriction_date

states = df["STATE"].iloc[4:].values

state_list = []
state_abbr_list = []
restriction_desc_list = []
restriction_type_list = []
restriction_date_list = []

for state in states:
    state, state_abbr, restriction_desc, restriction_type, restriction_date = get_policy_info_about_state(state)
    state_list += state
    state_abbr_list += state_abbr
    restriction_desc_list += restriction_desc
    restriction_type_list += restriction_type
    restriction_date_list += restriction_date

state_policy_array = pd.DataFrame({
    "state": state_list,
    "state_abbr": state_abbr_list,
    "restriction_desc": restriction_desc_list,
    "restriction_type": restriction_type_list,
    "restriction_date": restriction_date_list
})

state_policy_type = pd.read_excel("data/state_policy_categorization.xlsx")

state_policy_array = state_policy_array.merge(state_policy_type, left_on="restriction_desc", right_on="Policy Desc.", how="inner")
state_policies_cleaned = state_policy_array[["state", "state_abbr", "restriction_desc", "restriction_type", "Policy Type", "restriction_date"]]
state_policies_cleaned.to_csv("data/state_policies_cleaned.csv", index=False)

In [153]:
state_of_interest = "Illinois"

contact_reduce_num_list = []
notification_num_list = []
mask_vaccine_num_list = []


for state_of_interest in states:
    state_info = state_policies_cleaned.loc[state_policies_cleaned["state"] == state_of_interest].reset_index(drop=True)

    number_of_contact_reduction_policies = len(state_info.loc[(state_info["Policy Type"] == "Contact Reduction") & (state_info["restriction_type"] =="start")])
    number_of_notification_policies = len(state_info.loc[(state_info["Policy Type"] == "Notification") & (state_info["restriction_type"] =="start")])
    number_of_mask_madates = len(state_info.loc[(state_info["Policy Type"] == "Mask/Vaccine Mandate") & (state_info["restriction_type"] =="start")])

    contact_reduce_num_list.append(number_of_contact_reduction_policies)
    notification_num_list.append(number_of_notification_policies)
    mask_vaccine_num_list.append(number_of_mask_madates)


state_policy_summary = pd.DataFrame({
    "state": states,
    "state_abbr": df["POSTCODE"].iloc[4:].values,
    "contact_reduce_num": contact_reduce_num_list,
    "notification_num": notification_num_list,
    "mask_vaccine_num": mask_vaccine_num_list
})


In [154]:
weights = [5,1,10]
state_policy_summary["Strictness"] = state_policy_summary["contact_reduce_num"] * weights[0] + state_policy_summary["notification_num"] * weights[1] + state_policy_summary["mask_vaccine_num"] * weights[2]
## Normalize the strictness
state_policy_summary["Strictness"] = state_policy_summary["Strictness"] / state_policy_summary["Strictness"].max()
## Order the states by strictness
state_policy_summary = state_policy_summary.sort_values(by="Strictness", ascending=False)

state_policy_summary["Strict_Label"] = "Lax"
state_policy_summary.loc[state_policy_summary["Strictness"] > 0.8, "Strict_Label"] = "Moderate"
state_policy_summary.loc[state_policy_summary["Strictness"] > 0.9, "Strict_Label"] = "Strict"

case_count = pd.read_csv("data/us-counties.csv")
## Compute the number of new cases and fatalities from "cases" groupbed by fips
case_count = case_count.sort_values(by=["fips", "date"])
case_count["new_cases"] = case_count.groupby("fips")["cases"].diff().fillna(0)
case_count["new_fatalities"] = case_count.groupby("fips")["deaths"].diff().fillna(0)

num_infected = case_count.groupby("state")["new_cases"].sum().reset_index()
num_fatalities = case_count.groupby("state")["new_fatalities"].sum().reset_index()

state_policy_summary = state_policy_summary.merge(num_infected, left_on="state", right_on="state", how="inner")
state_policy_summary = state_policy_summary.merge(num_fatalities, left_on="state", right_on="state", how="inner")

state_policy_summary = state_policy_summary.rename(columns={
    "new_cases": "cases",
    "new_fatalities": "deaths"
})

population = pd.read_excel("data/PopulationEstimates.xlsx")
population = population.loc[population["Area_Name"].isin(states), ["Area_Name", "CENSUS_2020_POP"]]
population = population.rename(columns={
    "Area_Name": "state",
    "CENSUS_2020_POP": "population"
})

state_policy_summary = state_policy_summary.merge(population, left_on="state", right_on="state", how="inner")

In [155]:
state_policy_summary["infection_ratio"] = state_policy_summary["cases"] / state_policy_summary["population"]
state_policy_summary["fatality_ratio"] = state_policy_summary["deaths"] / state_policy_summary["population"]
state_policy_summary.groupby("Strict_Label")[["infection_ratio", "fatality_ratio"]].median()

Unnamed: 0_level_0,infection_ratio,fatality_ratio
Strict_Label,Unnamed: 1_level_1,Unnamed: 2_level_1
Lax,0.25398,0.003026
Moderate,0.255272,0.003263
Strict,0.219205,0.002789


In [157]:
state_policy_summary.to_csv("data/state_policy_summary.csv", index=False)

In [156]:
state_policy_summary

Unnamed: 0,state,state_abbr,contact_reduce_num,notification_num,mask_vaccine_num,Strictness,Strict_Label,cases,deaths,population,infection_ratio,fatality_ratio
0,Rhode Island,RI,19,36,4,1.0,Strict,349974.0,3535.0,1097379.0,0.318918,0.003221
1,Pennsylvania,PA,18,37,4,0.976608,Strict,2850257.0,44814.0,13002700.0,0.219205,0.003447
2,District of Columbia,DC,14,34,6,0.959064,Strict,143942.0,1340.0,689545.0,0.208749,0.001943
3,District of Columbia,DC,14,34,6,0.959064,Strict,143942.0,1340.0,689545.0,0.208749,0.001943
4,Oregon,OR,16,33,5,0.953216,Strict,738789.0,7559.0,4237256.0,0.174356,0.001784
5,Illinois,IL,16,33,5,0.953216,Strict,3214903.0,35734.0,12812508.0,0.250919,0.002789
6,New Mexico,NM,16,32,5,0.947368,Strict,526028.0,7608.0,2117522.0,0.248417,0.003593
7,Massachusetts,MA,17,40,3,0.906433,Strict,1662231.0,20371.0,7029917.0,0.236451,0.002898
8,Washington,WA,14,34,5,0.900585,Strict,1531015.0,12842.0,7705281.0,0.198697,0.001667
9,New York,NY,15,38,4,0.894737,Moderate,2844638.0,27671.0,20201249.0,0.140815,0.00137
