In [None]:
using CSV, DataFrames, Turing, CategoricalArrays, StatsBase, StatsPlots, Random,
    ReverseDiff, Revise, RCall, LinearAlgebra
using OptimizationOptimJL, Distributions, ApproxFun, Serialization, Printf, DataFramesMeta,
    StatProfilerHTML, StatsFuns, OptimizationBBO, Printf

using Interact
includet("debughelpers.jl")
includet("fithelpers.jl")
includet("gen_inits.jl")
includet("models.jl")
includet("fitmodel1.jl")
includet("fitmodel2.jl")
includet("fitmodel3.jl")


In [None]:
## Load the data and create the model object

##Random.seed!(20240719)


#optis = CSV.read("./fitted_models/opti" * model * ".csv", DataFrame)
optis = nothing

dt = load_flows()
dt.fromdist = categorical(dt.fromdist)
dt.todist = categorical(dt.todist)
dt.agegroup = categorical(dt.agegroup)
levels!(dt.agegroup,["below18","18-25","25-30","30-50","50-65","above65"])
rename!(dt, Dict(:dist => :distance))

## Create a districts file which has distcode, pop, density, xcoord, ycoord and save it in the data directory
dists = CSV.read("./data/districts.csv",DataFrame)
dists.distcode = categorical(dists.distcode)

distances = [norm([dists.xcoord[i]-dists.xcoord[j],dists.ycoord[i]-dists.ycoord[j]]) for i in 1:nrow(dists) , j in 1:nrow(dists) if i != j]
meddist = median(distances)
distances = nothing ## free the memory

popgerm = sum(dists.pop)

Nages = length(levels(dt.agegroup))

netactual = calcnet(dt.flows,
            levelcode.(dt.fromdist),
            levelcode.(dt.todist),
            levelcode.(dt.agegroup),
            Nages,
            Ndist)
Ncoefs = 36

dtsmall = dt[rand(Bernoulli(.1),nrow(dt)),:]

model3 = migration3(dtsmall.flows,sum(dtsmall.flows),levelcode.(dtsmall.fromdist),levelcode.(dtsmall.todist),
                        dtsmall.frompop,dtsmall.topop, popgerm, dtsmall.distance, levelcode.(dtsmall.agegroup),Nages
                        dtsmall.xcoord,dtsmall.ycoord,dists.density,dists.pop,nrow(dists),meddist,netactual,Ncoefs)



# Find Inits for model3

The goal will be to load in the data, and find the initial conditions for model 3 that are a good starting point in terms of the parameters
* a
* c
* d0
* dscale
* logisticconst



@manipulate for a = -10.0:.1:10.0,
                c = 0.01:.1:10.0,
                d0 = 0.001:.01,2.0,
                dscale = 0.05:.05:2.5,
                neterr = 0.1:.1:2.0,
                logisticconst = -10:.1:10
    vals = [fill(a,Nages);
            fill(c,Nages);
            fill(d0,Nages);
            fill(dscale,Nages);
            [neterr, logisticconst];
            fill(0.0,Nages); #kd
            fill(0.0,Nages*Ncoefs)
            ]
    names = [["a[$i]" for i in 1:Nages];
             ["c[$i]" for i in 1:Nages];
             ["d0[$i]" for i in 1:Nages];
             ["dscale[$i]" for i in 1:Nages];
             ["neterr","logisticconst"];
             ["kd[$i]" for i in 1:Nages];
             ["desirecoefs[$i]" for i in 1:(Ncoefs*Nages)];
             ]
    (preds,netflows) = generated_quantities(model3,vals,)
    @df dtsmall plot(:distance, log.(:flows ./ preds))
end
