In [None]:
# Uncomment the line below to install on Colab or similar
#! pip install git+https://github.com/monash-emu/wpro-working.git@more-datasets

In [None]:
#| warning: false
from jax import jit, random
import pandas as pd
from datetime import datetime,timedelta
import numpyro
from numpyro import distributions as dist
import arviz as az
from IPython.display import Markdown
from plotly.express.colors import qualitative as qual_colours
import numpy as np

from estival.sampling import tools as esamp

from emu_renewal.process import CosineMultiCurve
from emu_renewal.distributions import GammaDens
from emu_renewal.renew import RenewalModel
from emu_renewal.outputs import get_spaghetti_from_params, get_quant_df_from_spaghetti, plot_spaghetti, plot_uncertainty_patches, PANEL_SUBTITLES, plot_3d_spaghetti
from emu_renewal.calibration import StandardCalib

In [None]:
# Specify fixed parameters and get calibration data
proc_update_freq = 14
init_time = 50
mys_data = pd.read_csv("https://github.com/monash-emu/wpro_working/raw/main/data/new_cases.csv", index_col=0)["MYS"]
mys_data.index = pd.to_datetime(mys_data.index)
pop = 33e6
analysis_start = datetime(2021, 3, 1)
analysis_end = datetime(2021, 11, 1)
init_start = analysis_start - timedelta(init_time)
init_end = analysis_start - timedelta(1)
select_data = mys_data.loc[analysis_start: analysis_end]
init_data = mys_data.loc[init_start: init_end]

In [None]:
fitter = CosineMultiCurve()
renew_model = RenewalModel(33e6, analysis_start, analysis_end, proc_update_freq, fitter, GammaDens(), 50, init_data, GammaDens())

In [None]:
# Define parameter ranges
priors = {
    "gen_mean": dist.Gamma(10.0, 1.0),
    "gen_sd": dist.Gamma(5.0, 1.0),
    "cdr": dist.Beta(4.0, 10.0),
    "rt_init": dist.Normal(0.0, 0.25),
    "report_mean": dist.Uniform(8.0, 12.0),
    "report_sd": dist.Uniform(3.0, 6.0),
}

In [None]:
calib = StandardCalib(renew_model, priors, mys_data)

In [None]:
# Convenience function for PDF of a prior
def plotpdf(p):
    x = np.linspace(p.icdf(0.001),p.icdf(0.999),100)
    return pd.Series(data=np.exp(p.log_prob(x)),index=x)

plotpdf(priors["cdr"]).plot()

In [None]:
from numpyro import infer

In [None]:
# We do a random uniform sampling of initial points, but constrain the radius of the sample to lower
# than default; because our random process covers a lot of parameter space, we don't want to sample too far out,
# but still want to retain more diversity than simply using the median for all chains
kernel = numpyro.infer.NUTS(calib.calibration, dense_mass=True, init_strategy=infer.init_to_uniform(radius=0.5))

# We can start with any values we want for num_samples and num_warmup here
# 1000,1000 should be enough 'most of the time', and is useful while testing - expect a few bad runs depending on the seed
# 2000,2000 should be considerably more robust
# Higher values may be required for exacting results with 'pristine' r values
mcmc = numpyro.infer.MCMC(kernel, num_chains=2, num_samples=1000, num_warmup=1000)
rng_key = random.PRNGKey(14)
#mcmc.run(rng_key, extra_fields=("accept_prob","diverging"), params=priors,collect_warmup=True)

In [None]:
# Run the warmup phase of the mcmc separately - we want to examine this behaviour before committing to a run
# Things we're looking for - all chains run at approximately the same speed as one another
# (eg no order of magnitude differences)
# It's normal for runs to speed up during the warmup (as they get closer to the viable region)

mcmc.warmup(rng_key, extra_fields=("accept_prob", "diverging", "potential_energy"), params=priors, collect_warmup=True)

In [None]:
# Plot the potential energy (equivalent to log density) of our warmup samples
# By the end of the warmup run, these should be all in the same range for every chain
# If they are not, then there is no point running a calibration - either we've
# got a bad initial point (shouldn't happen), or something is wrong with our model/priors/NUTS configuration
# Even if they end up in the same place, it is worth checking to see if some chains took unusually long to 
# converge - if so, they could cause issues with the mass matrix calculation used for the actual calibration
# sampling.  Longer warmups should resolve this.

# Don't worry if chains occasionally dip lower than the others - this is a more 'perfect fit'/better MAP estimate
# but outside the center of mass - as long as they come back to the same range for most of the trace, all is well

pd.DataFrame(mcmc.get_extra_fields(True)["potential_energy"]).T.iloc[-1000:].plot()

In [None]:
# Now run the actual MCMC
# This should sample a bit faster than the warmup (if everything went right as above, then all our chains are properly
# preconditioned)

# If there are any chains running considerably faster or slower than the others, then something is wrong
# (most likely the mass matrix tuning is different for this chain; you can check the potential energy/trace
# to validate this)

mcmc.run(rng_key, extra_fields=("accept_prob","diverging","potential_energy"), params=priors)

In [None]:
idata = az.from_numpyro(mcmc)

In [None]:
# This should be an absolute maximum of 1.05 for any actual inference (good enough to 
# not be misleading, but still not really appropriate for publication/policy advice)
# For this kind of model, 1.00 is the target
az.summary(idata)

In [None]:
burn_in = 0
n_samples = 200
quantiles = [0.05, 0.5, 0.95]
idata_burnt = idata.sel(draw=slice(burn_in, None))
idata_sampled = az.extract(idata_burnt, num_samples=n_samples)
sample_params = esamp.xarray_to_sampleiterator(idata_sampled)

In [None]:
def get_full_result(gen_mean, gen_sd, proc, cdr, rt_init, report_mean, report_sd):
    return renew_model.renewal_func(gen_mean, gen_sd, proc, cdr, rt_init, report_mean, report_sd)

full_wrap = jit(get_full_result)
spaghetti = get_spaghetti_from_params(renew_model, sample_params, full_wrap)
quantiles_df = get_quant_df_from_spaghetti(renew_model, spaghetti, quantiles)

In [None]:
patch_fig = plot_uncertainty_patches(quantiles_df, select_data, qual_colours.Plotly)
patch_fig

In [None]:
Markdown(renew_model.get_description())

In [None]:
#plot_spaghetti(spaghetti, select_data)

In [None]:
# Optional - requires kaleido
#| label: fig-calib
#| fig-cap: "Calibration to sample data from Malaysia"
# patch_fig.write_image("patch_fig.svg")

In [None]:
# params_df.columns = ["name", "Lower limit", "Upper limit"]
# params_df.index = params_df["name"]
# params_df = params_df.drop(columns=["name"])
# params_df.index.name = None

In [None]:
Markdown("### Calibration")

In [None]:
Markdown(calib.get_description())

In [None]:
# Markdown(params_df.to_markdown())

In [None]:
# evidence_table = pd.DataFrame(index=params_df.index, columns=["Evidence"])
# evidence_table.loc[:, "Evidence"] = "To be populated [@cori2013]"
# Markdown(evidence_table.to_markdown())

In [None]:
# plot_3d_spaghetti(spaghetti, ["susceptibles", "transmission potential"])