## Try out a simple optimisation routine with scipy's `minimize` function
Using the same model as in the previous notebook,
get an arbitrarily chosen (local) optimisation algorithm running.
$\\i_t = R_t\sum_{\tau<t} i_\tau g_{t-\tau}$
So that we can start using these various model components in more sophisticated ways,
I'll begin writing the code as properly encapsulated functions
and tracking a few key model quantities.

In [None]:
from typing import Dict, List
import numpy as np
import pandas as pd
from scipy.optimize import minimize, shgo

from distributions import get_gamma_params_from_mean_sd, get_gamma_densities_from_params
from outputs import Outputs, plot_output_fit

In [None]:
def renew(gen_time_densities, process_vals, pop, seed, n_times) -> Outputs:
    """The renewal process.
    """
    incidence = np.zeros(n_times)
    suscept = np.zeros(n_times)
    r_t = np.zeros(n_times)

    incidence[0] = seed
    suscept[0] = pop - seed
    r_t[0] = process_vals[0] * suscept[0] / pop
    
    for t in range(1, n_times):
        contribution_by_day = incidence[:t] * gen_time_densities[:t][::-1]  # Product of incidence values and reversed generation times
        r_t[t] = process_vals[t] * suscept[t - 1] / pop  # Pre-specified process by the proportion susceptible
        incidence[t] = contribution_by_day.sum() * r_t[t]  # Incidence
        suscept[t] = max(suscept[t - 1] - incidence[t], 0.0)  # Zero out any small negative susceptible values
        
    return Outputs(incidence, suscept, r_t)

def get_interp_vals_over_model_time(req: List[float], n_times: int) -> np.array:
    """Linear interpolation at requested values at regular intervals over simulation period.
    """
    return np.interp(range(n_times), np.linspace(0.0, n_times, len(req)), req)

def model_func(gen_time_mean: float, gen_time_sd: float, process_req: List[float], pop: int, seed: int, n_times: int) -> tuple:
    """The other epidemiological aspects of the model.
    """
    gen_time_densities = get_gamma_densities_from_params(gen_time_mean, gen_time_sd, n_times)
    process_vals = get_interp_vals_over_model_time(process_req, n_times)
    process_vals_exp = np.exp(np.array(process_vals))
    model_result = renew(gen_time_densities, process_vals_exp, pop, seed, n_times)
    return model_result, process_vals_exp

def process_calib_func(parameters: List[float], gen_time_mean, gen_time_sd, pop: int, seed: int, n_times: int, targets: dict) -> float:
    incidence = model_func(gen_time_mean, gen_time_sd, parameters, pop, seed, n_times)[0][0]
    return sum([(incidence[t] - d) ** 2 for t, d in targets.items()])    

def all_param_calib_func(parameters: List[float], pop: int, seed: int, n_times: int, targets: dict) -> float:
    """Get the loss function from the model.
    """
    gen_time_mean, gen_time_sd, *process_req = parameters
    incidence = model_func(gen_time_mean, gen_time_sd, process_req, pop, seed, n_times)[0][0]
    return sum([(incidence[t] - d) ** 2 for t, d in targets.items()])

### Parameters and targets
Set up some basic parameters and targets

In [None]:
n_times = 40
infectious_seed = 1.0
population = 100.0
dummy_data = pd.Series(
    {
        5: 1.0,
        10: 1.0,
        15: 1.5,
        25: 4.2,
        30: 3.8,
        35: 2.1,
    },
)

### Local optimisation with process values only
Here is an optimisation algorithm applied to these dummy data.
It works for this simple case, but I have tried out more
complex data to optimise to and it quickly breaks down.

In [None]:
param_bounds = [[-1000.0, np.log(10.0)]] * 4
gen_time_mean = 5.5
gen_time_sd = 1.7
result = minimize(process_calib_func, [np.log(2.0)] * 4, method='Nelder-Mead', args=(gen_time_mean, gen_time_sd, population, infectious_seed, n_times, dummy_data), bounds=param_bounds)
model_result, process_vals = model_func(gen_time_mean, gen_time_sd, result.x, population, infectious_seed, n_times)
optimised, suscept, r_t = model_result
plot_output_fit(dummy_data.to_dict(), model_result, process_vals, n_times)

### Local optimisation including the generation time-related parameters

In [None]:
param_bounds = [[5.0, 6.0]] + [[1.5, 2.0]] + [[-10000.0, np.log(10.0)]] * 4
result = minimize(all_param_calib_func, [5.0, 1.5] + [np.log(2.0)] * 4, method='Nelder-Mead', args=(population, infectious_seed, n_times, dummy_data), bounds=param_bounds)
model_result, process_vals = model_func(result.x[0], result.x[1], result.x[2:], population, infectious_seed, n_times)
optimised, suscept, r_t = model_result
plot_output_fit(dummy_data.to_dict(), model_result, process_vals, n_times)

### Calibrate to outputs produced by model
Check that the optimisation algorithm can recover
the parameters used in generating some arbitrary data.
This does seem a little more robust and works 
for a broad range of parameter inputs I have
tried, and can find the optimimum even with quite
broad parameter bounds.

In [None]:
model_times = pd.Series(range(n_times))
test_params = [5.5, 1.8, np.log(1.8), np.log(2.5), np.log(1.6), np.log(0.7)]
test_data, _ = model_func(test_params[0], test_params[1], test_params[2:], population, infectious_seed, n_times)
test_vals = {t: d for t, d in zip(model_times, test_data.incidence)}

In [None]:
# Global optimisation with shgo - need to capture arguments through closure due to bug in optimisation function
# as per comment at https://stackoverflow.com/questions/72794609/scipy-issue-passing-arguments-to-optimize-shgo-function
param_bounds = [[0.1, 10.0]] + [[0.1, 4.0]] + [[np.log(1.0), np.log(5.0)]] * 4
global_result = shgo(lambda x, p=population, s=infectious_seed, t=n_times, d=test_vals: all_param_calib_func(x, p, s, t, d), param_bounds)
model_result, process_vals = model_func(global_result.x[0], global_result.x[1], global_result.x[2:], population, infectious_seed, n_times)
optimised, suscept, r_t = model_result
print(global_result.x[:2])
print(np.exp(np.array(global_result.x[2:])))
plot_output_fit(test_vals, model_result, process_vals, n_times)