### Attempt fitting model to empiric data
Fit models to real-world data.
Possible next steps:

* Make the random walk actually a walk
* Distinguish infection time from symptom onset time
* Allow for time step other than one
* Bayesian calibration
* Documentation with markdown

In [None]:
import warnings
from typing import List
import numpy as np
import pandas as pd
from datetime import datetime
import nevergrad as ng

from emu_renewal.renew import renew_taper_seed
from emu_renewal.distributions import get_gamma_densities_from_params
from emu_renewal.process import get_spline_interp_func, get_linear_interp_func, get_piecewise_cosine
from emu_renewal.outputs import Outputs, plot_output_fit

In [None]:
def model_func(gen_time_mean: float, gen_time_sd: float, process_req: List[float], pop: int, seed: int, n_times: int, run_in: int) -> tuple:
    """The common features of the model.
    Get the generation time distribution from the user request,
    generate the non-mechanistic process,
    exponentiate the result and run the renewal process.
    """
    gen_time_densities = get_gamma_densities_from_params(gen_time_mean, gen_time_sd, n_times)
    req_x_vals = np.linspace(0.0, n_times, len(process_req))
    func = get_piecewise_cosine(req_x_vals, process_req)
    process_vals = func(np.array([float(t) for t in range(n_times)]))
    process_vals_exp = np.exp(np.array(process_vals))
    model_result = renew_taper_seed(gen_time_densities, process_vals_exp, pop, np.exp(seed), n_times, run_in)
    return model_result, process_vals_exp

def calib_func(parameters: List[float], pop: int, n_times: int, run_in: int, targets: dict) -> float:
    """Including the generation time parameters as parameters,
    least squares loss function.
    """
    gen_time_mean, gen_time_sd, cdr, seed, *process_req = parameters
    incidence = model_func(gen_time_mean, gen_time_sd, process_req, pop, seed, n_times, run_in)[0][0]
    return sum([(incidence[t] * cdr - d) ** 2 for t, d in targets.items()])

In [None]:
# Fixed parameters
population = 33e6
run_in = 30
n_process_periods = 12

In [None]:
raw_data = pd.read_csv('https://github.com/monash-emu/wpro_working/raw/main/data/new_cases.csv', index_col=0)['MYS']
raw_data.index = pd.to_datetime(raw_data.index)
mys_data = raw_data.loc[datetime(2021, 3, 1): datetime(2021, 11, 1)].reset_index()['MYS']
mys_data.index += run_in
n_times = len(mys_data) + run_in

In [None]:
#| warning: false
gen_time_mean_param = ng.p.Scalar(init=5.0, lower=0.1, upper=14.0)
gen_time_sd_param = ng.p.Scalar(init=5.0, lower=2.5, upper=8.0)
cdr_param = ng.p.Scalar(init=0.06, lower=0.04, upper=0.2)
seed_param = ng.p.Scalar(init=np.log(1e4), lower=np.log(5e3), upper=np.log(2e4))
process_param = ng.p.Array(init=[0.0] * n_process_periods, lower=-2.0, upper=2.0)
instrum = ng.p.Instrumentation(gen_time_mean_param, gen_time_sd_param, cdr_param, seed_param, process_param)
def obj_func(gen_time_mean, gen_time_sd, cdr_param, seed, parameters):
    return calib_func([gen_time_mean, gen_time_sd, cdr_param, seed] + list(parameters), pop=population, n_times=n_times, run_in=run_in, targets=mys_data)
optimizer = ng.optimizers.NGOpt(parametrization=instrum, budget=5000)
ngopt_result = optimizer.minimize(obj_func).value[0]

In [None]:
#| label: fit
#| fig-cap: "Optimisation to sample data from Malaysia"
ngopt_output, ngopt_process = model_func(ngopt_result[0], ngopt_result[1], ngopt_result[4], 33e6, ngopt_result[3], n_times, run_in)
# print(f'generation time mean: {ngopt_result[0]}')
# print(f'generation time sd: {ngopt_result[1]}')
# print(f'cdr: {ngopt_result[2]}')
# print(f'seed: {ngopt_result[3]}')
# print('random process vals: ' + ', '.join([str(np.exp(i)) for i in ngopt_result[-1]]))
plot_output_fit(mys_data, ngopt_output, ngopt_process, n_times, cdr=ngopt_result[2])