# Analyze the ABC output

In [None]:
using CSV,DataFrames,Plots,Statistics,Distributions,Measures
gr();

include("auxilliary.jl"); include("parameters.jl"); include("flow.jl"); include("abcmc.jl");
ENV["COLUMNS"]=200;

## Inputs

In [None]:
# File of abc samples
df = CSV.read("ABCsmp.csv",DataFrame,header=false);

# threshold for ℓerr cutoff, given as a top quantile (eg 0.05 is top 5%)
qℓ = 0.1;

## Extract samples

In [None]:
ncols = ncol(df)-1; nℓcols = qℓ*ncols;
println("The total number of abc sampling before conditioning: $ncols")
println("The total number of abc sampling after conditioning with top $qℓ quantile: $nℓcols")

In [None]:
prmrg,prmvary=abcdata();prmvary[:ℓerr]=true;prmvary[:βθ]=true;
println("Varied parameters:")
for key in keys(prmvary)
    if prmvary[key]
        println(key)
    end
end

In [None]:
# Create dictionary of row position of key parameters;
mykeys = [key for key in keys(prmvary)]; mykeys=vcat(mykeys,[:ℓerr,:βθ])
pnt=Dict{Symbol,Int64}()
for key in mykeys
    pos = 1;
    while df[pos,1]!=string(key)
        pos+=1;
    end
    pnt[key]=pos;
end;

# Extract marginal values before conditioning on threshold
marginals = Dict{Symbol,Vector{Float64}}();
for key in keys(prmvary)
    if prmvary[key]
        marginals[key] = [v for v in df[pnt[key],2:end]];
    end
end
flag = marginals[:ℓerr] .< quantile(marginals[:ℓerr],qℓ);
for key in keys(marginals)
    marginals[key] = marginals[key][flag]
end;

## Plot marginal distributions

In [None]:
histogram(marginals[:ℓerr],title="ℓerr",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:ρ],title="ρ",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:αeff],title="αeff",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:βα],title="βα",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:γα],title="γα",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:γθ],title="γθ",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram2d(marginals[:γθ],marginals[:γα],labels="",xlabel="γθ",ylabel="γα",size=(500,250))

In [None]:
βμ = [mean(Weibull(marginals[:βα][i],marginals[:βθ][i])) for i=1:length(marginals[:βα])];
γμ = [mean(Weibull(marginals[:γα][i],marginals[:γθ][i])) for i=1:length(marginals[:γα])];

histogram2d(βμ,γμ,xlabel="Length of infectious period mean",ylabel="Length of recovery period mean",size=(500,325))

## Pearson correlation coefficients

In [None]:
μs = Dict{Symbol,Float64}(); σs = Dict{Symbol,Float64}();
for key in keys(marginals)
    μs[key] = sum(marginals[key])/length(marginals[key]);
    σs[key] = √( sum((marginals[key].-μs[key]).^2)/length(marginals[key]) );
end
n=length(keys(marginals));
PC = Matrix{Float64}(undef,n,n); id₁=0;
for key₁ in keys(marginals)
    id₁+=1;
    id₂=0;
    for key₂ in keys(marginals)
        id₂+=1;
        PC[id₁,id₂] = sum( (marginals[key₁].-μs[key₁]).*(marginals[key₂].-μs[key₂]) )/(σs[key₁]*σs[key₂]);
        PC[id₁,id₂] *= 1/length(marginals[key₁]);
    end
end;

In [None]:
dfpc = DataFrame("prm"=>[key for key in keys(marginals)]);
pos = 0;
for key in keys(marginals)
    pos += 1;
    dfpc[!,string(key)] = convert(Vector,PC[:,pos]);
end
println("Pearson correlation coefficients (remember βθ is not free):")
dfpc

# Analyze Best Fit

## Inputs

In [None]:
nnd = 2500; nndsmp = 2500;
atol = 1e-5;
rtol = 1e-3;

## Simulate the best fit

In [None]:
df[!,1] = Symbol.(df[!,1])

# Find best fit
idℓerr = 1;
while df[idℓerr,1]!=:ℓerr
    idℓerr+=1;
end

ℓs = [df[idℓerr,k] for k=2:ncol(df)]
ℓbf = minimum(ℓs);
pos = findfirst(ℓs.==ℓbf) + 1;

In [None]:
# Prepare the data vector
vkeys = df[:,1]; prm,_=rdprm(df[:,pos],vkeys);

# Set the input discretizations
prm[:nnd][1] = nnd; prm[:atol][1] = atol; prm[:rtol][1] = rtol; prm[:nndsmp][1] = nndsmp;
data!(prm);

In [None]:
# Run the simulation
ysol,yʳsol = pdesolve(;prm=prm);

## Plot equation coefficients

In [None]:
plot(:α;prm=prm)

In [None]:
plot(:β;prm=prm)

In [None]:
plot(:γ;prm=prm)

In [None]:
plot(:Weibull;prm=prm)

In [None]:
plot(:fˢ;prm=prm)

In [None]:
plot(:fⁱ;prm=prm)

## Plot best fit solution

In [None]:
plot(ysol)

In [None]:
plot(ysol,yʳsol;prm=prm)

In [None]:
plotbd(ysol;prm=prm)

### Plot errors with ODH

In [None]:
df_yˢ = CSV.read("ODH_ys.csv",DataFrame);
first(df_yˢ,7)

In [None]:
# Compute relative yserr's
yserr = Matrix{Float64}(undef,nrow(df_yˢ),9);

# Adapted from abcmc.jl
npts = length(df_yˢ[!,"time"]);
taxis = [ysol[i].yˢ.tlvl.t₀[1] for i=1:length(ysol)]; 

@inbounds for k=1:npts
    tnow = df_yˢ[!,"time"][k];
    yserr[k,1] = df_yˢ[!,"0-9"][k]; yserr[k,2] = df_yˢ[!,"10-19"][k]; yserr[k,3] = df_yˢ[!,"20-29"][k];
    yserr[k,4] = df_yˢ[!,"30-39"][k]; yserr[k,5] = df_yˢ[!,"40-49"][k]; yserr[k,6] = df_yˢ[!,"50-59"][k];
    yserr[k,7] = df_yˢ[!,"60-69"][k]; yserr[k,8] = df_yˢ[!,"70-79"][k]; yserr[k,9] = df_yˢ[!,"80+"][k];
    
    ℓ = myfindfirst(taxis,tnow|>Float64);
    ℓ = ℓ==1 ? 2 : ℓ;
    ynow = myinterp(tnow|>Float64,ysol[ℓ-1].yˢ,ysol[ℓ].yˢ);
    
    yserr[k,1] = abs(yserr[k,1]-eval(ynow,5.0*365))/abs(yserr[k,1]); 
    yserr[k,2] = abs(yserr[k,2]-eval(ynow,15.0*365))/abs(yserr[k,2]); 
    yserr[k,3] = abs(yserr[k,3]-eval(ynow,25.0*365))/abs(yserr[k,3]);
    yserr[k,4] = abs(yserr[k,4]-eval(ynow,35.0*365))/abs(yserr[k,4]); 
    yserr[k,5] = abs(yserr[k,5]-eval(ynow,45.0*365))/abs(yserr[k,5]); 
    yserr[k,6] = abs(yserr[k,6]-eval(ynow,55.0*365))/abs(yserr[k,6]);
    yserr[k,7] = abs(yserr[k,7]-eval(ynow,65.0*365))/abs(yserr[k,7]); 
    yserr[k,8] = abs(yserr[k,8]-eval(ynow,75.0*365))/abs(yserr[k,8]); 
    yserr[k,9] = abs(yserr[k,9]-eval(ynow,90.0*365))/abs(yserr[k,9]);
end

In [None]:
heatmap([5,15,25,35,45,55,65,75,90],df_yˢ[!,"date"],yserr,
         title="yˢ relative error",xlabel="age",ylabel="date",
        size=(600,300))

In [None]:
df_yⁱ = CSV.read("ODH_yi.csv",DataFrame);
last(df_yⁱ,7)

In [None]:
yierr = Vector{Float64}(undef,length(df_yⁱ[!,"time"]));
ntpts = length(df_yⁱ[!,"time"]);
@inbounds for k=1:ntpts
    tnow = df_yⁱ[!,"time"][k]|>Float64;
    ℓ = myfindfirst(taxis,tnow);
    ℓ = ℓ==1 ? 2 : ℓ;
    ynow = myinterp(tnow,ysol[ℓ-1].yⁱ,ysol[ℓ].yⁱ);
    
    yierr[k] = abs(df_yⁱ[!,:yi][k]-eval(ynow,0.0))/abs(df_yⁱ[!,:yi][k]);
end

In [None]:
plot(df_yⁱ[!,:date],yierr,linewidth=3,labels="",
     title="yⁱ relative error in daily incidence",
     xlabel="date",ylabel="rel error",size=(400,225))