# Inverse UQ of 'a' and 'm' parameter

In [14]:
using Pkg
Pkg.activate("C:/Users/lisah/Documents/Repos/ma-code")

[32m[1m  Activating[22m[39m project at `C:\Users\lisah\Documents\Repos\ma-code`


In [15]:
include("c:/Users/lisah/Documents/Repos/ma-code/src/src.jl")
using .Src, DataFrames, Optim, ForwardDiff, LinearAlgebra, CSV



## Tools

In [24]:
"""
    function compute_ll_am(x, hprm::Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)

compute the log-likelihood in least-squares form for Klausmeier model for data with Gaussian noise. First, simulate Klausmeier model for given hyperparameters and noise level. Then, compare to true trajectories.
x contains the parameters of interest, which should be inferred.

# Arguments
- `x`: variables with respect to which likelihood is computed
- `hprm::Hyperprm`: parameters for which the Klausmeier simulation is performed
- `true_val::DataFrame`: observed data trajectories. DataFrame with columns "w" and "n".
- `t_fixed::Bool`: true if we consider a fixed observation time window
- `t_end::Float64`: end of observation window (if t_fixed=true)
- `t_step::Float64`: step size with which M observations should be picked (set if t_fixed=false)

# Returns
- `Float`: scalar value of log-likelihood at given grid point 
"""
function compute_ll_am(x, hprm::Src.Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)
    a, m = x
    hprm = Src.Hyperprm(hprm.w0, hprm.n0, a, m, hprm.M, hprm.noise)
    pred_val = Src.sol_klausmeier(hprm; t_fixed=t_fixed, t_end=t_end,t_step=t_step)
    if hprm.noise == 0.0
        ll = -0.5 * sum((true_val[:,"n"] - pred_val[:,"n"]) .^2) - 0.5 * sum((true_val[:,"w"] - pred_val[:,"w"]) .^2) # add up ll for both trajectories
    else
        ll = -0.5 * 1/hprm.noise * sum((true_val[:,"n"] - pred_val[:,"n"]) .^2) - 0.5 * 1/hprm.noise * sum((true_val[:,"w"] - pred_val[:,"w"]) .^2) # add up ll for both trajectories
    end
    return ll
end

"""
    function compute_mle(hprm::Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)

compute the maximum likelihood estimate given data observations by minimizing the negative log-likelihood function using the Optim.jl package.
The initialization point is chosen as the true parameter combination underlying the data observation to ensure fast convergence to global minimum.
The minimization method is chosen by default.

# Returns
- `Vector{Float64}`: 2-element vector containing the mle [a_mle, n0_mle]
- `Bool`: true if optimization was successfull
"""
function compute_mle_am(hprm::Src.Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0, N::Int64=5)
    inits, inits_loss, mles, losses, best_loss_ind, converged = mult_restart_mle_am(N, hprm, true_val; t_fixed=t_fixed, t_end=t_end, t_step=t_step)
    return mles[best_loss_ind, :], converged[best_loss_ind]
end

"""
    function mult_restart_mle(N::Int64, hprm::Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)

Perform Maximum Likelihood estimation for N different starting points. Goal is to find global minimum

# Inputs
    - `N::Int64`: number of restarts

# Returns
    - `Matrix`: initial values used in optimization
    - `Vector`: losses of initial values
    - `Matrix`: computed MLEs
    - `Vector`: corresponding losses of MLEs
    - `Int`: index of optimization trial creating minimal loss
    - `Vector`: convergence status for each optimization trial
"""
function mult_restart_mle_am(N::Int64, hprm::Src.Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)
    # generate optim start pts
    inits = hcat(2 .* rand(N), 4 .* rand(N))

    # store mles and corresponding loss
    mle_vals = zeros(N, 2)
    mle_loss, inits_loss, converged = zeros(N), zeros(N), zeros(N)

    for i in 1:N
        pt = inits[i,:] # starting point for optimization
        result = optimize(x -> - compute_ll_am(x, hprm, true_val; t_fixed=t_fixed, t_end=t_end, t_step=t_step), pt)
        #display(result)
        mle_vals[i,:] = Optim.minimizer(result)
        mle_loss[i] =  Optim.minimum(result)
        converged[i] = Optim.converged(result)
        inits_loss[i] = -compute_ll_am(pt, hprm, true_val; t_fixed=t_fixed, t_end=t_end, t_step=t_step)
    end

    # extract best
    best_loss, best_loss_ind = findmin(mle_loss)

    return inits, inits_loss, mle_vals, mle_loss, best_loss_ind, converged
end

mult_restart_mle_am

In [25]:
"""
    function compute_fi_am(eval_pt::Vector{Float64}, hprm::Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)

compute the Fisher information at evaluation point. The Fisher information is given by the trace of the negative Hessian of the log-likelihood function.

# Returns
- `Float64`: Fisher information value at given evaluation point
"""
function compute_fi_am(eval_pt::Vector{Float64}, hprm::Src.Hyperprm, true_val::DataFrame; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)
    H = ForwardDiff.hessian(x -> compute_ll_am(x, hprm, true_val; t_fixed=t_fixed, t_end=t_end, t_step=t_step), eval_pt)
    return tr(-H)
end

"""
    function store_fish_data(w0::Float64,m::Float64,M::Int64,noise::Float64,df::DataFrame, path::String)

stores data evaluated on grid in a csv file.
Name of form "fish_w0_n0_a_m_M_noise.csv"

# Arguments
- `df::DataFrame`: df to store
- `path_to_repo::String`: path to folder where to store the file
"""
function store_fish_data(w0::Float64,m::Float64,M::Int64,noise::Float64,df::DataFrame, path::String)
    CSV.write("$(path)fish_$(w0)_$(m)_$(M)_$(noise).csv", df)
end

"""
    function gen_all_fish_data(M_vals, noise_vals, m, w0, path; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)

function that generates and stores all the fish data needed. On all a,n0,M,noise prm combinations specifed.

# Arguments
- `M_val::Vector{Int64}`: sample sizes
- `noise_vals::Vector{Float64}`: noise levels
- `m::Float64`: mortality rate in Klausmeier model (fixed)
- `w0::Float64`: initial value for water compartment in Klausmeier model (fixed)
- `path::String`: path to folder where fish data is stored
- `t_fixed::Bool`: true if we consider a fixed observation time window
- `t_end::Float64`: end of observation window (if t_fixed=true)
- `t_step::Float64`: step size with which M observations should be picked (set if t_fixed=false)
"""
function gen_all_fish_data(M_vals, noise_vals, m, w0, path; t_fixed::Bool=false, t_end::Float64=50.0, t_step::Float64=1.0)
    for M in M_vals
        for noise in noise_vals

            grid = Src.create_grid()
            fish = zeros(41, 21)

            # keep track of whether the optimization algo terminates successfully when finding the MLE
            success_counter = 0
            eval_pt_counter = 0

            # evaluate fisher info on grid
            for i in range(1, 41)
                for j in range(1, 21)
                    eval_pt_counter = eval_pt_counter + 1 # total number of optimizations

                    pt = grid[i,j] # true observation parameter point
                    hprm = Src.Hyperprm(w0, pt[2], pt[1], m, M, noise) # w0,n0,a,m,M

                    sol_true = Src.sol_klausmeier(hprm; t_fixed=t_fixed, t_end=t_end, t_step=t_step)
                    sol_true = Src.randomize_data!(sol_true, hprm.noise) # include noise

                    mle, success = compute_mle_am(hprm, sol_true; t_fixed=t_fixed, t_end=t_end, t_step=t_step)

                    # evaluate Fi at MLE
                    fish[i,j] = compute_fi_am(mle, hprm, sol_true; t_fixed=t_fixed, t_end=t_end, t_step=t_step)

                    success_counter = success_counter + success # number of successfull optimizations
                end
            end

            success_fraction = success_counter / eval_pt_counter
            println("MLE terminated with success in $success_fraction cases.")
            
            # create data frame
            a_eval_pts = string.(0.0:0.1:2.0)
            df_fish = DataFrame(fish, a_eval_pts)

            store_fish_data(w0, m, M, noise, df_fish, path)
        end
    end
end

gen_all_fish_data

## Inverse UQ

### generate data

In [26]:
# general setup
t_fixed = true
t_end = 50.0;

In [None]:
path = "C:/Users/lisah/Documents/Repos/ma-code/data/t_fixed/t50/fisher_am/"
noise_vals = [0.01]
M_vals = [10,50,100,500]
w0 = 0.95
m = 0.45 # m of data observations is always 0.45

gen_all_fish_data(M_vals, noise_vals, m, w0, path, t_fixed=t_fixed, t_end=t_end)