# Analyze the ABC output

In [None]:
using CSV,DataFrames,Plots,Statistics,Distributions,Measures
gr();

include("auxilliary.jl"); include("parameters.jl"); include("flow.jl"); include("abcmc.jl");
ENV["COLUMNS"]=200;

## Inputs

In [None]:
# Dataframe of abc samples
frt = 1; lst = 1;
df = CSV.read("ABCsmp$frt.csv",DataFrame,header=false);
for id=frt+1:lst
    dftemp = CSV.read("ABCsmp$id.csv",DataFrame,header=false);
    df = hcat(df,dftemp[:,2:end],makeunique=true);
end

# Dataframe of trajectories
dftrj = CSV.read("ABCtrj$frt.csv",DataFrame,header=false);
for id=frt+1:lst
    dftemp = CSV.read("ABCtrj$id.csv",DataFrame,header=false);
    dftrj = hcat(dftrj,dftemp[:,2:end],makeunique=true);
end

# threshold for ℓerr cutoff, given as a top quantile (eg 0.05 is top 5%)
qℓ = 1.0;

## Extract samples

In [None]:
ncols = ncol(df)-1; nℓcols = qℓ*ncols;
println("The total number of abc sampling before conditioning: $ncols")
println("The total number of abc sampling after conditioning with top $qℓ quantile: $nℓcols")

In [None]:
prmrg,prmvary=abcdata();prmvary[:ℓerr]=true;prmvary[:βθ]=true;
println("Varied parameters:")
for key in keys(prmvary)
    if prmvary[key]
        println(key)
    end
end

In [None]:
# Create dictionary of row position of key parameters;
mykeys = [key for key in keys(prmvary)]; mykeys=vcat(mykeys,[:ℓerr,:βθ])
pnt=Dict{Symbol,Int64}()
for key in mykeys
    pos = 1;
    while df[pos,1]!=string(key)
        pos+=1;
    end
    pnt[key]=pos;
end;

# Extract marginal values conditioned on threshold
marginals = Dict{Symbol,Vector{Float64}}();
for key in keys(prmvary)
    if prmvary[key]
        marginals[key] = [v for v in df[pnt[key],2:end]];
    end
end
flag = marginals[:ℓerr] .<= quantile(marginals[:ℓerr],qℓ);
for key in keys(marginals)
    marginals[key] = marginals[key][flag]
end;

# Extract trajectories conditioned on threshold
dftrjabc = dftrj[:,2:end];
dftrjabc = dftrjabc[:,flag];
dftrjabc = hcat(DataFrame("day"=>convert(Vector,0:(nrow(dftrjabc)-1))),dftrjabc);
last(dftrjabc,7)

## Plot marginal distributions

In [None]:
histogram(marginals[:ℓerr],title="ℓerr",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:ρ],title="ρ",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:αeff],title="αeff",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:βα],title="βα",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:γα],title="γα",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram(marginals[:γθ],title="γθ",labels="",size=(500,250),normalize=:pdf)

In [None]:
histogram2d(marginals[:γθ],marginals[:γα],labels="",xlabel="γθ",ylabel="γα",size=(500,250))

In [None]:
βμ = [mean(Weibull(marginals[:βα][i],marginals[:βθ][i])) for i=1:length(marginals[:βα])];
γμ = [mean(Weibull(marginals[:γα][i],marginals[:γθ][i])) for i=1:length(marginals[:γα])];

histogram2d(βμ,γμ,xlabel="Length of infectious period mean",ylabel="Length of recovery period mean",size=(500,325))

## Pearson correlation coefficients

In [None]:
μs = Dict{Symbol,Float64}(); σs = Dict{Symbol,Float64}();
for key in keys(marginals)
    μs[key] = sum(marginals[key])/length(marginals[key]);
    σs[key] = √( sum((marginals[key].-μs[key]).^2)/length(marginals[key]) );
end
n=length(keys(marginals));
PC = Matrix{Float64}(undef,n,n); id₁=0;
for key₁ in keys(marginals)
    id₁+=1;
    id₂=0;
    for key₂ in keys(marginals)
        id₂+=1;
        PC[id₁,id₂] = sum( (marginals[key₁].-μs[key₁]).*(marginals[key₂].-μs[key₂]) )/(σs[key₁]*σs[key₂]);
        PC[id₁,id₂] *= 1/length(marginals[key₁]);
    end
end;

In [None]:
dfpc = DataFrame("prm"=>[key for key in keys(marginals)]);
pos = 0;
for key in keys(marginals)
    pos += 1;
    dfpc[!,string(key)] = convert(Vector,PC[:,pos]);
end
println("Pearson correlation coefficients (remember βθ is not free):")
dfpc

# Analyze Best Fit

## Inputs

In [None]:
nnd = 2500; nndsmp = 2500;
atol = 1e-5;
rtol = 1e-3;

## Simulate the best fit

In [None]:
df[!,1] = Symbol.(df[!,1])

# Find best fit
idℓerr = 1;
while df[idℓerr,1]!=:ℓerr
    idℓerr+=1;
end

ℓs = [df[idℓerr,k] for k=2:ncol(df)]
ℓbf = minimum(ℓs);
pos = findfirst(ℓs.==ℓbf) + 1;

In [None]:
# Prepare the data vector
vkeys = df[:,1]; prm,_=rdprm(df[:,pos],vkeys);

# Set the input discretizations
prm[:nnd][1] = nnd; prm[:atol][1] = atol; prm[:rtol][1] = rtol; prm[:nndsmp][1] = nndsmp;
data!(prm);

In [None]:
# Run the simulation
ysol,yʳsol = pdesolve(;prm=prm);

## Plot equation coefficients

In [None]:
plot(:α;prm=prm)

In [None]:
plot(:β;prm=prm)

In [None]:
plot(:γ;prm=prm)

In [None]:
plot(:Weibull;prm=prm)

In [None]:
plot(:fˢ;prm=prm)

In [None]:
plot(:fⁱ;prm=prm)

## Plot best fit solution

In [None]:
plot(ysol)

In [None]:
plot(ysol,yʳsol;prm=prm)

In [None]:
plotbd(ysol;prm=prm)

### Plot errors with ODH

In [None]:
df_yⁱ = CSV.read("ODH_snipdaily.csv",DataFrame);
first(df_yⁱ,3)

# Copied from abcmc's ℓerr
npts = nrow(df_yⁱ);
taxis = [ysol[i].yˢ.tlvl.t₀[1] for i=1:length(ysol)];

yⁱ_daily = Vector{Float64}(undef,npts);
#  Total infections during this period
kT = sum(df_yⁱ[!,"daily_confirm"]);
#  ∫yⁱdt at s=0 by trapezoidal rule
∫yⁱdt = 0.0;
@inbounds for k=2:length(taxis)
    ∫yⁱdt += (ysol[k].yⁱ.ys[1]+ysol[k-1].yⁱ.ys[1])/2*(taxis[k]-taxis[k-1]);
end
neff = kT/∫yⁱdt;

# Compute difference in daily incidence between model prediction and observed
@inbounds for k=1:npts
    tnow = k-1.0;
    ℓ = myfindfirst(taxis,tnow);
    ℓ = ℓ==1 ? 2 : ℓ;
    ynow = myinterp(tnow,ysol[ℓ-1].yⁱ,ysol[ℓ].yⁱ);
    yⁱ_daily[k] = neff*eval(ynow,0.0);
end 

In [None]:
# Extract error for this trajectory
rms,yⁱ_daily = ℓerr(ysol;prm=prm);

# Compute 95% quantiles
qlow = [quantile(dftrj[i,2:end],0.025) for i=1:nrow(dftrj)];
qhgh = [quantile(dftrj[i,2:end],0.975) for i=1:nrow(dftrj)];

In [None]:
npts = nrow(df_yⁱ);
plot(df_yⁱ[!,:time],df_yⁱ[!,:daily_confirm],linewidth=1,labels="ODH raw",linestyle=:dash,
     title="ODH Daily Cases",xlabel="date",ylabel="count",size=(400,225));
odhavg = Vector{Float64}(undef,npts);
@inbounds for i=1:npts
    Σ=0.0; ct = 0;
    for k=-3:3
        try
            Σ += df_yⁱ[i+k,:daily_confirm];
            ct += 1;
        catch
            true;
        end
    end
    odhavg[i] = Σ/ct;
end
plot!(df_yⁱ[!,:time],odhavg,linewidth=3,labels="ODH 7 day mvavg");
plot!(df_yⁱ[!,:time],yⁱ_daily[1:npts],linewidth=3,labels="model",
                     ribbon=(yⁱ_daily[1:npts]-qlow,qhgh-yⁱ_daily[1:npts]))