In [1]:
import json
import pandas as pd
import numpy as np

cumulative infections

In [None]:
df = pd.read_csv(r"C:\Users\msnin\Downloads\Oden\Oden-github\Measles Model\Datasets Used\infection_county_cases.csv")

df['date'] = pd.to_datetime(df['date'])

# Calculate cumulative sums for all counties (all columns except 'date')
df_cumulative = df.copy()
df_cumulative.iloc[:, 1:] = df_cumulative.iloc[:, 1:].cumsum()

output_path = r"C:\Users\msnin\Downloads\Oden\Oden-github\Measles Model\Datasets Used\cum_infection_county_cases.csv"
df_cumulative.to_csv(output_path, index=False)

recoveries

In [3]:
import pandas as pd
from datetime import timedelta
from pathlib import Path

def make_recoveries_from_infections(
    infections_csv: str,
    out_csv: str,
    shift_days: int = 10,
    exceptions: list | None = None,
    right_censor_to_last: bool = True,
):
    path = Path(infections_csv)
    df = pd.read_csv(path)
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df = df.dropna(subset=['date']).sort_values('date').reset_index(drop=True)
    county_cols = [c for c in df.columns if c != 'date']
    for c in county_cols:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0).astype(int)

    min_d, max_d = df['date'].min(), df['date'].max()

    # Long infections
    long_inf = df.melt(id_vars='date', var_name='county', value_name='infected')
    long_inf = long_inf[long_inf['infected'] > 0].copy()

    def map_recovery_date(d):
        rec = d + timedelta(days=shift_days)
        if rec <= max_d:
            return rec
        return max_d if right_censor_to_last else pd.NaT

    long_inf['recovery_date'] = long_inf['date'].apply(map_recovery_date)
    if not right_censor_to_last:
        long_inf = long_inf.dropna(subset=['recovery_date'])

    recovered_long = (long_inf
                      .groupby(['recovery_date','county'], as_index=False)['infected']
                      .sum()
                      .rename(columns={'infected':'recovered'}))

    # Apply exceptions (subtract from recovery on (inf_date + shift_days))
    if exceptions:
        for exc_date, county_name, subtract_n in exceptions:
            exc_date = pd.to_datetime(exc_date)
            rec_date = exc_date + timedelta(days=shift_days)
            if rec_date > max_d and right_censor_to_last:
                rec_date = max_d
            if rec_date < min_d or rec_date > max_d:
                continue
            mask = (recovered_long['recovery_date']==rec_date) & (recovered_long['county']==county_name)
            if not mask.any():
                recovered_long = pd.concat([
                    recovered_long,
                    pd.DataFrame([{'recovery_date': rec_date, 'county': county_name, 'recovered': -subtract_n}])
                ], ignore_index=True)
            else:
                recovered_long.loc[mask, 'recovered'] = recovered_long.loc[mask, 'recovered'] - subtract_n

    # Back to wide aligned to original date index
    recovered_wide = (recovered_long
                      .pivot(index='recovery_date', columns='county', values='recovered')
                      .fillna(0).astype(int)
                      .reindex(df['date'].unique(), fill_value=0)
                      .rename_axis('date')
                      .reset_index())

    # Keep original columns/order
    for c in county_cols:
        if c not in recovered_wide.columns:
            recovered_wide[c] = 0
    recovered_wide = recovered_wide[['date'] + county_cols]
    recovered_wide[county_cols] = recovered_wide[county_cols].astype(int)

    Path(out_csv).parent.mkdir(parents=True, exist_ok=True)
    recovered_wide.to_csv(out_csv, index=False)


In [4]:
make_recoveries_from_infections(
    infections_csv= output_path,
    out_csv=r"C:\Users\msnin\Downloads\Oden\Oden-github\Measles Model\Datasets Used\cum_recovery_by_county.csv",
    shift_days=10,
    exceptions=[("2025-03-04","Gaines",1), ("2025-04-04","Gaines",1)],
    right_censor_to_last=True
)