# Imports

# Imports

In [1]:
import numpy as np
import pandas as pd

from sir import sir

# Default inputs

In [2]:
START_DATE = "2020-04-15"
FORECAST_DAYS_1 = 45
FORECAST_DAYS_2 = 55
FIPS = 36061
BETA_PERCENTILE_1 = 25
BETA_PERCENTILE_2 = 50
GAMMA = 1/14
START_DAY = 0

# Injected inputs

In [3]:
# Parameters
FIPS = 17031
BETA_PERCENTILE_1 = None
BETA_PERCENTILE_2 = None


# Dates

In [4]:
TODAY = pd.Timestamp.today().date()
START_DATE = pd.Timestamp(START_DATE).date()
STOP_DATE = (START_DATE + pd.DateOffset(FORECAST_DAYS_1)).date()

# Files

In [5]:
INPUT_FILE_NAME = f"{TODAY}_{FIPS}_betas.csv"
OUTPUT_FILE_NAME = f"{START_DATE}_{TODAY}_{FIPS}_sir2-model-output.csv"

# Read in beta value

In [6]:
df = pd.read_csv(INPUT_FILE_NAME).set_index("percentile")
beta_1 = df.loc[BETA_PERCENTILE_1, "beta"] if BETA_PERCENTILE_1 is not None else None
beta_2 = df.loc[BETA_PERCENTILE_2, "beta"] if BETA_PERCENTILE_2 is not None else None

# Read in USA FACTS data

In [7]:
case_df = pd.read_csv(
    "https://usafactsstatic.blob.core.windows.net/"
    "public/data/covid-19/covid_confirmed_usafacts.csv",
)

case_df = case_df[case_df["countyFIPS"] == FIPS].melt(
    # Melt dataframe (wide to long format)
    id_vars=["countyFIPS", "County Name", "State", "stateFIPS"],
    value_name="confirmed",
    var_name="date",
).astype({"date": "datetime64"})

In [8]:
# Create a day variable from the date variable
case_df = case_df.assign(
    days=(case_df["date"] - case_df["date"].min()).dt.days
)

# Trim up to but not including day 30
case_df = case_df[case_df["days"].ge(30)]

# Reverse order (highest to lowest day)
case_df = case_df.sort_values("days", ascending=False)

# Replace incorrect values with missing values
while case_df["confirmed"].pct_change().gt(0).any():
    case_df.loc[
        case_df["confirmed"].pct_change().gt(0),
        "confirmed",
    ] = np.nan

# Replace missing values with previous values
case_df.assign(
    confirmed=case_df["confirmed"].ffill()
)

# Restore the original order (lowest to highest day)
case_df = case_df.sort_values(["countyFIPS", "days"])

case_df = case_df.assign(
    # Calculate new cases from confirmed cases
    new_cases=case_df["confirmed"]
    .diff()
    .fillna(0)
)

# Read in county population data

In [9]:
cens_df = pd.read_csv(
    "https://www2.census.gov/programs-surveys/popest/datasets/"
    "2010-2019/counties/totals/co-est2019-alldata.csv",
    usecols=[
        "STATE",
        "COUNTY",
        "STNAME",
        "CTYNAME",
        "POPESTIMATE2019"
    ],
    encoding="latin-1"
)

# Combine state and county fips
cens_df = cens_df.assign(
    county_fips=(
        cens_df["STATE"].astype(str)
        + cens_df["COUNTY"].astype(str).str.zfill(3)
    ).astype(int)
)

In [10]:
# pop_df = pd.read_csv(
#     "https://usafactsstatic.blob.core.windows.net/"
#     "public/data/covid-19/covid_county_population_usafacts.csv",
#     ).set_index("countyFIPS").drop(
#         # Remove unallocated cases (FIPS 0)
#         0
#     ).reset_index()
# pop_df.head()

# Add population to case data

In [11]:
df = case_df.assign(
    population=float(
        cens_df.loc[
        cens_df["county_fips"] == FIPS,
        "POPESTIMATE2019"
        ]
    )
).set_index("date")

# Calculate growth_rate, doubling time, beta, and Rt

$\Huge r = \frac{new\_cases_t - new\_cases_{t-1}}{new\_cases_{t-1}}$

$\Huge T_d=\frac{ln(2)}{ln(r+1)}$

$\Huge \beta=\frac{r+\gamma}{N}$

$\Huge R_t=e^{r\cdot T_c}=\frac{\beta \cdot N}{\gamma}=\frac{r + \gamma}{\gamma}$

$\Huge T_c=\frac{ln(\frac{\beta \cdot N}{\gamma})}{r}=\frac{ln(\frac{r + \gamma}{\gamma})}{r}$

In [12]:
# %% Growth rate
df = df.assign(
    susceptible=df["population"] - (df["confirmed"] * 10),
    gr=df["new_cases"].pct_change() / df["days"].diff(),
)

# Remove infinite and missing growth rates 
df["gr"] = df["gr"].replace([np.inf, -np.inf], np.nan)
df = df.dropna(subset=["gr"])

# Doubling time, beta, and Rt
df = df.assign(
    tc=np.log((df["gr"] + GAMMA) / GAMMA) / df["gr"],
    dt=np.log(2) / np.log(df["gr"] + 1),
    beta=(df["gr"] + GAMMA) / df["susceptible"],
)

# If growth rate is zero, Rt = e^0 = 1, regardless of the value of Tc
df.loc[df["gr"]==0, "tc"] = 0

df = df.assign(
    # Calculate 7-day moving average of growth rate
    smooth_gr=df["gr"].rolling(
        window=7,
        min_periods=1
    ).mean(),
    # Calculate 7-day moving average of mean generation interval
    smooth_tc=df["tc"].rolling(
        window=7,
        min_periods=1
    ).mean(),
)

df["e^r"] = np.exp(df["gr"])
df["e^smooth_r"] = np.exp(df["smooth_gr"])
df["r+gamma"] = df["gr"]+GAMMA

df = df.assign(
    smooth_dt=np.log(2) / np.log(df["smooth_gr"] + 1),
    smooth_beta=(df["smooth_gr"] + GAMMA) / df["susceptible"],
    new_rt=np.exp(df["gr"]*df["tc"]),
    old_rt=(df["gr"] + GAMMA) / GAMMA,
    new_smooth_rt=np.exp(df["smooth_gr"]*df["smooth_tc"]),
    old_smooth_rt=(df["smooth_gr"] + GAMMA) / GAMMA,
)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


# Run the model using the provided betas and start dates

In [13]:
sir1 = pd.DataFrame(
    sir(
        susceptible=df.loc[START_DATE, "population"],
        infected=df.loc[START_DATE, "confirmed"],
        recovered=0,
        beta=beta_1 if beta_1 else df.loc[START_DATE, "smooth_beta"],
        gamma=GAMMA,
        start=START_DAY,
        stop=FORECAST_DAYS_1
    ),
    columns=["day", "susceptible", "infected", "recovered"]
).assign(
    date=pd.date_range(START_DATE, periods=FORECAST_DAYS_1 + 1).date
).set_index("date")

In [14]:
sir2 = pd.DataFrame(
    sir(
        susceptible=sir1.loc[STOP_DATE, "susceptible"],
        infected=sir1.loc[STOP_DATE, "infected"],
        recovered=sir1.loc[STOP_DATE, "recovered"],
        beta=beta_2 if beta_2 else df.loc[STOP_DATE, "smooth_beta"],
        gamma=GAMMA,
        start=FORECAST_DAYS_1,
        stop=FORECAST_DAYS_1 + FORECAST_DAYS_2 
    ),
    columns=["day", "susceptible", "infected", "recovered"]
).assign(
    date=pd.date_range(
        STOP_DATE,
        periods=FORECAST_DAYS_2 + 1
    ).date
).set_index("date")

sir_df = pd.concat([sir1, sir2])
sir_df.to_csv(OUTPUT_FILE_NAME, index=False)