In [2]:
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm

warnings.filterwarnings('ignore')

In [3]:
def getCDFDataFixed(cdf, col_name):
    cdf_pivot_cases = (
        pd
        .pivot_table(
            cdf,
            index=['date'],
            columns=['fips'],
            values=[col_name]
        )
        .reset_index()
        .fillna(0))
    cdf_pivot_cases.columns = (
        np
        .insert(
            np
            .sort(cdf['fips'].unique())
            .astype('str'), 0, 'date', axis=0)
    )

    for col in cdf_pivot_cases.columns[1:]:
        cdf_pivot_cases[col] = cdf_pivot_cases[col].diff().fillna(0)
        cdf_pivot_cases[col] = cdf_pivot_cases[col].apply(
            lambda x: x if x >= 0 else 0
        )

    for col in cdf_pivot_cases.columns[1:]:
        cdf_pivot_cases[col] = (
            cdf_pivot_cases[col]
            .rolling(14, min_periods=1)
            .sum()
        )

    cdf_mlt_cases = (
        pd.melt(
            cdf_pivot_cases,
            id_vars=['date'],
            value_vars=cdf_pivot_cases.columns[1:],
            var_name='fips',
            value_name=col_name)
    )
    return cdf_mlt_cases

In [4]:
def GenerateData(mdf, cdf, sdf, state_fips):
    mdf['date'] = pd.to_datetime(mdf['date'])
    mdf.rename(columns={'dest': 'fips'}, inplace=True)
    cdf['date'] = pd.to_datetime(cdf['date'])
    mdf = mdf[mdf['fips'] == state_fips]
    cdf = pd.merge(cdf, sdf[['fips','Pop']])
    cdf_mlt_cases = getCDFDataFixed(cdf, 'cases')
    cdf_mlt_deaths = getCDFDataFixed(cdf, 'deaths')
    cdf_fxd = pd.merge(cdf_mlt_cases, cdf_mlt_deaths)
    cdf_new = sdf[['fips', 'Pop']]
    cdf_new['fips'] = cdf_new['fips'].astype(str)
    cdf_fxd['fips'] = cdf_fxd['fips'].astype(str)
    cdf_fxd = pd.merge(cdf_fxd, cdf_new, left_on='fips', right_on='fips')

    cdf_fxd['cases_pc'] = cdf_fxd['cases']*100000/cdf_fxd['Pop']
    cdf_fxd['deaths_pc'] = cdf_fxd['deaths']*100000/cdf_fxd['Pop']
    mdf.drop(columns=['fips'], inplace=True)
    mdf.rename(columns={'src': 'fips'}, inplace=True)

    mdf['fips'] = mdf['fips'].astype('str')
    df = pd.merge(cdf_fxd, mdf, how='inner')
    df['risk_c'] = df['cases_pc']*df['freq']
    df['risk_d'] = df['deaths_pc']*df['freq']
    ldf = df[df['fips'] == str(state_fips)]
    edf = df[df['fips'] != str(state_fips)]

    edf = edf.groupby(by=['date']).agg({
        'risk_c': np.sum,
        'risk_d': np.sum
    }).reset_index()

    edf.rename(columns={
        'risk_c': 'Ext_risk_c',
        'risk_d': 'Ext_risk_d'
    }, inplace=True)
    edf = edf[['date', 'Ext_risk_c', 'Ext_risk_d']]
    ldf.rename(columns={
        'risk_c': 'Int_risk_c',
        'risk_d': 'Int_risk_d'
    }, inplace=True)
    ldf = ldf[['date', 'cases_pc', 'deaths_pc', 'Int_risk_c', 'Int_risk_d']]
    cmbdf = pd.merge(edf, ldf)
    cmbdf.to_csv('StateFiles/CombinedDF_'+str(state_fips)+'.csv', index=False)

In [5]:
mdf = pd.read_csv('Data/SafeGraphData.csv')
cdf = pd.read_csv('Data/caseInformation_States.csv')
sdf = pd.read_csv('Data/stateInfo.csv')

state_fips_list = sdf['fips'].unique()
for state_fips in tqdm(state_fips_list):
    GenerateData(mdf, cdf, sdf, state_fips)

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:11<00:00,  4.38it/s]
