Installing necessary packages:
```Julia
using Pkg

Pkg.add(["Random", "Distributions", "DataFrames", "GLM", "Statistics", "ProgressMeter", "Plots"])
Pkg.add(url = "https://github.com/ncn-foreigners/UnobservedCountEstimation.jl")
```

In [1]:
using Random, Distributions, DataFrames, GLM, Statistics, ProgressMeter, CSV, UnobservedCountEstimation

In [12]:
α = 1.18
β = .4
Q = 20 # Liczba państw
nsims = 10000

N_distr = Poisson(2000)
M_distr = (N, α) -> Poisson.(N .^ α)
p_distr = Uniform(.1,.75)

res = Vector{Any}(missing, nsims)

prog = Progress(10*Threads.nthreads(), "Simulation progress ...")

Threads.@threads for i in 1:(10*Threads.nthreads())
    Threads.threadid()
    next!(prog)
end # end for

In [13]:
prog = Progress(nsims, "Simulation progress ...")

Threads.@threads for k in 1:nsims
    N = reduce(vcat, rand.(Binomial.(rand(N_distr, Q), .1), 1))
    M = reduce(vcat, rand.(M_distr(N, α), 1))
    p = rand(p_distr, Q)
    
    m = reduce(vcat, rand.(Poisson.(M .* p), 1))
    n = reduce(vcat, rand.(Binomial.(N, p), 1))

    df1 = DataFrame(
        y = m,
        x1 = log.(N),
        x2 = log.(n ./ N)
    )

    mm = glm(@formula(y ~ x1 + x2 + 0), df1, Poisson(), LogLink())
    α̂₁, β̂₁ = coef(mm)

    ols = lm(@formula(log(y) ~ x1 + x2 + 0), df1)
    α̂₂, β̂₂ = coef(ols)

    mle_1 = zhang_model(m, N, n; start = "glm")[1][4]
    mle_2 = zhang_model(m, N, n; start = "lm")[1][4]

    res[k] = [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂) mle_1 mle_2]
    #push!(res, [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂)])
    next!(prog)
end # end for

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:04[39m


In [14]:
df_res = DataFrame(reduce(vcat, [res[i] for i in 1:length(res) if isassigned(res, i)]), ["actual", "expected", "glm_est", "ols_est", "mle_glm_start", "mle_lm_start"])

describe(df_res)

Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,actual,10388.3,9564.0,10383.0,11284.0,0,Float64
2,expected,10387.6,9630.59,10383.5,11284.4,0,Float64
3,glm_est,10124.5,7769.26,10108.9,12619.6,0,Float64
4,ols_est,10043.8,7344.74,10027.7,12962.6,0,Float64
5,mle_glm_start,10212.9,7.81636e-07,10075.2,1084720.0,0,Float64
6,mle_lm_start,10114.2,129.876,10074.2,281008.0,0,Float64


In [18]:
DataFrame(
    rel_bias = [mean((df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])        for k in 2:6],
    rel_mse  = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ df_res[:, 1]) for k in 2:6],
    rel_mae  = [mean(abs.(df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])    for k in 2:6],
    est      = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,-3.88775e-05,1.01787,0.00791474,Expected value
2,0.0253778,33.2048,0.0455993,glm
3,0.0331568,49.1403,0.0554955,ols
4,0.0169276,11197.9,0.0614371,mle_glm_start
5,0.0263298,761.149,0.0524953,mle_lm_start


In [19]:
DataFrame(
    rel_bias = [mean((df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])        for k in 3:6],
    rel_mse  = [mean(((df_res[:, 2] .- df_res[:, k]) .^ 2) ./ df_res[:, 2]) for k in 3:6],
    rel_mae  = [mean(abs.(df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])    for k in 3:6],
    est      = ["glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,0.0253226,34.097,0.0462806,glm
2,0.0330993,50.0688,0.0560264,ols
3,0.0166834,11405.2,0.0622333,mle_glm_start
4,0.0262998,755.17,0.0530745,mle_lm_start


In [20]:
CSV.write(pwd() * "/data_raw/init_res.csv", df_res);

### Higher population

In [42]:
α = 2
β = 2
Q = 7 # Liczba państw
nsims = 10000

N_distr = Poisson(80000)
M_distr = (N, α) -> Poisson.(N .^ α)
p_distr = Uniform(.1,.25)

res = Vector{Any}(missing, nsims)

prog = Progress(nsims, "Simulation progress ...")

Threads.@threads for k in 1:nsims
    N = reduce(vcat, rand.(Binomial.(rand(N_distr, Q), .1), 1))
    M = reduce(vcat, rand.(M_distr(N, α), 1))
    p = rand(p_distr, Q)
    
    m = reduce(vcat, rand.(Poisson.(M .* p), 1))
    n = reduce(vcat, rand.(Binomial.(N, p), 1))

    df1 = DataFrame(
        y = m,
        x1 = log.(N),
        x2 = log.(n ./ N)
    )

    mm = glm(@formula(y ~ x1 + x2 + 0), df1, Poisson(), LogLink())
    α̂₁, β̂₁ = coef(mm)

    ols = lm(@formula(log(y) ~ x1 + x2 + 0), df1)
    α̂₂, β̂₂ = coef(ols)
    
    mle_1 = zhang_model(m, N, n; start = "glm")[1][4]
    mle_2 = zhang_model(m, N, n; start = "lm")[1][4]

    res[k] = [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂) mle_1 mle_2]
    #push!(res, [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂)])
    next!(prog)
end # end for

df_res = DataFrame(reduce(vcat, [res[i] for i in 1:length(res) if isassigned(res, i)]), ["actual", "expected", "glm_est", "ols_est", "mle_glm_start", "mle_lm_start"])

describe(df_res)

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:03[39m


Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,actual,448113000.0,432083000.0,448102000.0,462372000.0,0,Float64
2,expected,448113000.0,432107000.0,448102000.0,462396000.0,0,Float64
3,glm_est,443353000.0,263153000.0,441171000.0,780502000.0,0,Float64
4,ols_est,443427000.0,264904000.0,441216000.0,788874000.0,0,Float64
5,mle_glm_start,897721000000.0,1427.69,441220000.0,8900290000000000.0,0,Float64
6,mle_lm_start,443402000.0,264473000.0,441199000.0,789216000.0,0,Float64


In [43]:
DataFrame(
    rel_bias = [mean((df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])        for k in 2:6],
    rel_mse  = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ df_res[:, 1]) for k in 2:6],
    rel_mae  = [mean(abs.(df_res[:, 1] .- df_res[:, 2]) ./ df_res[:, 1])    for k in 2:6],
    est      = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,4.56932e-07,0.994899,3.75767e-05,Expected value
2,0.0106187,2788950.0,3.75767e-05,glm
3,0.0104561,2923100.0,3.75767e-05,ols
4,-1989.58,1.75624e+19,3.75767e-05,mle_glm_start
5,0.0105128,2928510.0,3.75767e-05,mle_lm_start


In [44]:
DataFrame(
    rel_bias = [mean((df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])        for k in 3:6],
    rel_mse  = [mean(((df_res[:, 2] .- df_res[:, k]) .^ 2) ./ df_res[:, 2]) for k in 3:6],
    rel_mae  = [mean(abs.(df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])    for k in 3:6],
    est      = ["glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,0.0106182,2789010.0,0.0606381,glm
2,0.0104556,2923140.0,0.062121,ols
3,-1989.51,1.75617e+19,1989.58,mle_glm_start
4,0.0105123,2928550.0,0.0621786,mle_lm_start


In [45]:
CSV.write(pwd() * "/data_raw/init_res_1.csv", df_res);