### Warning, work in progress
Trying to get models to actually fit to real-world
data, with very limited success so far.
Here I am exponentiating the random process values.

Possible next steps:
- Make the random walk actually a walk
- Allow for time step other than one (possibly)
- Distinguish infection time from symptom onset time
- Parameterise the case detection rate

In [None]:
from typing import List
import numpy as np
import pandas as pd
from datetime import datetime
from scipy.optimize import minimize, shgo
import nevergrad as ng

from emu_renewal.distributions import get_gamma_params_from_mean_sd, get_gamma_densities_from_params
from emu_renewal.renew import renew_basic
from emu_renewal.process import get_interp_vals_over_model_time
from emu_renewal.outputs import Outputs, plot_output_fit

In [None]:
def model_func(gen_time_mean: float, gen_time_sd: float, process_req: List[float], pop: int, seed: int, n_times: int) -> tuple:
    """The common features of the model.
    Get the generation time distribution from the user request,
    linearly interpolate the non-mechanistic process,
    exponentiate the result and run the renewal process.
    """
    gen_time_densities = get_gamma_densities_from_params(gen_time_mean, gen_time_sd, n_times)
    process_vals = get_interp_vals_over_model_time(process_req, n_times)
    process_vals_exp = np.exp(np.array(process_vals))
    model_result = renew_basic(gen_time_densities, process_vals_exp, pop, seed, n_times)
    return model_result, process_vals_exp

def calib_func(parameters: List[float], pop: int, seed: int, n_times: int, targets: dict) -> float:
    """Additionally include the generation time parameters as parameters,
    least squares loss function.
    """
    gen_time_mean, gen_time_sd, *process_req = parameters
    incidence = model_func(gen_time_mean, gen_time_sd, process_req, pop, seed, n_times)[0][0]
    return sum([(incidence[t] - d) ** 2 for t, d in targets.items()])

In [None]:
# Model parameters
population = 33e6
infectious_seed = 100.0
n_times = 40
max_gen_mean = 10.0
max_gen_sd = 4.0
long_gen_densities = get_gamma_densities_from_params(max_gen_mean, max_gen_sd, n_times)
gen_times_end = np.argmax(long_gen_densities.cumsum() > 0.9999)

In [None]:
raw_data = pd.read_csv('https://github.com/monash-emu/wpro_working/raw/main/data/new_cases.csv', index_col=0)['MYS']
raw_data.index = pd.to_datetime(raw_data.index)
mys_data = raw_data.loc[datetime(2021, 3, 1): datetime(2021, 11, 1)].reset_index()['MYS']
n_times = len(mys_data)

# Scale up notifications to incidence outside of model
mys_data *= 16

In [None]:
gen_time_mean_param = ng.p.Scalar(init=5.0, lower=0.1, upper=20.0)
gen_time_sd_param = ng.p.Scalar(init=1.0, lower=0.1, upper=4.0)
process_param = ng.p.Array(init=[0.0] * 4, lower=-10.0, upper=10.0)
instrum = ng.p.Instrumentation(gen_time_mean_param, gen_time_sd_param, process_param)
def obj_func(gen_time_mean, gen_time_sd, parameters):
    return calib_func([gen_time_mean, gen_time_sd] + list(parameters), pop=33e6, seed=100.0, n_times=n_times, targets=mys_data)
optimizer = ng.optimizers.NGOpt(parametrization=instrum, budget=2000)
ngopt_result = optimizer.minimize(obj_func).value[0]  # Zero index gets us the args (not kwargs)
ngopt_output, ngopt_process = model_func(ngopt_result[0], ngopt_result[1], ngopt_result[2], 33e6, 100.0, len(mys_data))
print(f'generation time mean: {ngopt_result[0]}')
print(f'generation time sd: {ngopt_result[1]}')
print('random process vals: ' + ', '.join([str(i) for i in ngopt_result[2:]]))
plot_output_fit(mys_data, ngopt_output, ngopt_process, n_times)