Installing necessary packages:
```Julia
using Pkg

Pkg.add(["Random", "Distributions", "DataFrames", "GLM", "Statistics", "ProgressMeter", "Plots"])
Pkg.add(url = "https://github.com/ncn-foreigners/UnobservedCountEstimation.jl")
```

In [1]:
using Random, Distributions, DataFrames, GLM, Statistics, ProgressMeter, CSV, UnobservedCountEstimation

In [2]:
α = 1.18
β = .4
Q = 20 # Liczba państw
nsims = 10000

N_distr = Poisson(2000)
M_distr = (N, α) -> Poisson.(N .^ α)
p_distr = Uniform(.1,.75)

res = Vector{Any}(missing, nsims)

prog = Progress(10*Threads.nthreads(), "Simulation progress ...")

Threads.@threads for i in 1:(10*Threads.nthreads())
    Threads.threadid()
    next!(prog)
end # end for

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:00[39m


In [3]:
prog = Progress(nsims, "Simulation progress ...")

Threads.@threads for k in 1:nsims
    N = reduce(vcat, rand.(Binomial.(rand(N_distr, Q), .1), 1))
    M = reduce(vcat, rand.(M_distr(N, α), 1))
    p = rand(p_distr, Q)
    
    m = reduce(vcat, rand.(Poisson.(M .* p), 1))
    n = reduce(vcat, rand.(Binomial.(N, p), 1))

    df1 = DataFrame(
        y = m,
        x1 = log.(N),
        x2 = log.(n ./ N)
    )

    mm = glm(@formula(y ~ x1 + x2 + 0), df1, Poisson(), LogLink())
    α̂₁, β̂₁ = coef(mm)

    ols = lm(@formula(log(y) ~ x1 + x2 + 0), df1)
    α̂₂, β̂₂ = coef(ols)

    mle_1 = zhang_model(m, N, n; start = "glm")[1][4]
    mle_2 = zhang_model(m, N, n; start = "lm")[1][4]

    res[k] = [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂) mle_1 mle_2]
    #push!(res, [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂)])
    next!(prog)
end # end for

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:10[39m


In [4]:
df_res = DataFrame(reduce(vcat, [res[i] for i in 1:length(res) if isassigned(res, i)]), ["actual", "expected", "glm_est", "ols_est", "mle_glm_start", "mle_lm_start"])

describe(df_res)

Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,actual,10385.6,9539.0,10383.0,11291.0,0,Float64
2,expected,10386.4,9644.23,10384.1,11138.6,0,Float64
3,glm_est,10109.7,7737.6,10096.3,12628.6,0,Float64
4,ols_est,10026.6,7491.12,10012.3,13909.2,0,Float64
5,mle_glm_start,10075.4,7635.48,10064.9,12928.9,0,Float64
6,mle_lm_start,10075.4,7635.48,10064.9,12928.9,0,Float64


In [5]:
DataFrame(
    rel_bias = [mean((df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])        for k in 2:6],
    rel_mse  = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ df_res[:, 1]) for k in 2:6],
    rel_mae  = [mean(abs.(df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])    for k in 2:6],
    est      = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,-0.000169505,1.01479,0.00791518,Expected value
2,0.0265683,33.5616,0.0457753,glm
3,0.0345757,49.7021,0.055824,ols
4,0.0298726,39.6858,0.0496614,mle_glm_start
5,0.0298726,39.6858,0.0496614,mle_lm_start


In [7]:
DataFrame(
    bias  = [mean((df_res[:, 1] .- df_res[:, k]) ./ 1000)        for k in 2:6],
    mse   = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ 1000) for k in 2:6],
    rmse  = [sqrt(mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ 1000)) for k in 2:6],
    var   = [var(df_res[:, k] / 1000) for k in 2:6],
    sd    = [std(df_res[:, k] / 1000) for k in 2:6],
    mae   = [mean(abs.(df_res[:, 1] .- df_res[:, k]) ./ 1000)    for k in 2:6],
    est   = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,bias,mse,rmse,var,sd,mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,String
1,-0.00076667,10.5339,3.24559,0.0372891,0.193104,0.0821635,Expected value
2,0.275946,348.589,18.6705,0.316884,0.562924,0.47536,glm
3,0.359076,516.281,22.7218,0.431709,0.657045,0.579754,ols
4,0.310269,412.235,20.3036,0.359972,0.599977,0.515746,mle_glm_start
5,0.310269,412.235,20.3036,0.359972,0.599977,0.515746,mle_lm_start


In [8]:
CSV.write(pwd() * "/data_raw/init_res.csv", df_res);

### Higher population

In [9]:
α = 1.7
β = 1.5
Q = 7 # Liczba państw
nsims = 10000

N_distr = Poisson(8000)
M_distr = (N, α) -> Poisson.(N .^ α)
p_distr = Uniform(.1,.25)

res = Vector{Any}(missing, nsims)

prog = Progress(nsims, "Simulation progress ...")

Threads.@threads for k in 1:nsims
    N = reduce(vcat, rand.(Binomial.(rand(N_distr, Q), .1), 1))
    M = reduce(vcat, rand.(M_distr(N, α), 1))
    p = rand(p_distr, Q)
    
    m = reduce(vcat, rand.(Poisson.(M .* p), 1))
    n = reduce(vcat, rand.(Binomial.(N, p), 1))

    df1 = DataFrame(
        y = m,
        x1 = log.(N),
        x2 = log.(n ./ N)
    )

    mm = glm(@formula(y ~ x1 + x2 + 0), df1, Poisson(), LogLink())
    α̂₁, β̂₁ = coef(mm)

    ols = lm(@formula(log(y) ~ x1 + x2 + 0), df1)
    α̂₂, β̂₂ = coef(ols)
    
    mle_1 = zhang_model(m, N, n; start = "glm")[1][4]
    mle_2 = zhang_model(m, N, n; start = "lm")[1][4]

    res[k] = [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂) mle_1 mle_2]
    #push!(res, [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂)])
    next!(prog)
end # end for

df_res = DataFrame(reduce(vcat, [res[i] for i in 1:length(res) if isassigned(res, i)]), ["actual", "expected", "glm_est", "ols_est", "mle_glm_start", "mle_lm_start"])

describe(df_res)

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:03[39m


Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,actual,603295.0,552533.0,603318.0,666878.0,0,Float64
2,expected,603301.0,552508.0,603320.0,666694.0,0,Float64
3,glm_est,540564.0,76104.5,521386.0,2219170.0,0,Float64
4,ols_est,541198.0,76830.3,521469.0,2377820.0,0,Float64
5,mle_glm_start,541150.0,75757.0,521319.0,2383860.0,0,Float64
6,mle_lm_start,541150.0,75757.0,521319.0,2383860.0,0,Float64


In [10]:
DataFrame(
    rel_bias = [mean((df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])        for k in 2:6],
    rel_mse  = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ df_res[:, 1]) for k in 2:6],
    rel_mae  = [mean(abs.(df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])    for k in 2:6],
    est      = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,-1.22167e-05,1.01319,0.00103183,Expected value
2,0.104025,34708.4,0.192399,glm
3,0.102981,36050.7,0.19624,ols
4,0.103062,36019.4,0.196201,mle_glm_start
5,0.103062,36019.4,0.196201,mle_lm_start


In [11]:
DataFrame(
    bias  = [mean((df_res[:, 1] .- df_res[:, k]) ./ 1000)        for k in 2:6],
    mse   = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ 1000) for k in 2:6],
    rmse  = [sqrt(mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ 1000)) for k in 2:6],
    var   = [var(df_res[:, k] / 1000) for k in 2:6],
    sd    = [std(df_res[:, k] / 1000) for k in 2:6],
    mae   = [mean(abs.(df_res[:, 1] .- df_res[:, k]) ./ 1000)    for k in 2:6],
    est   = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,bias,mse,rmse,var,sd,mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,String
1,-0.00609831,611.385,24.7262,188.862,13.7427,0.622393,Expected value
2,62.731,20954100.0,4577.57,17203.6,131.162,116.048,glm
3,62.0973,21764900.0,4665.29,18097.9,134.529,118.368,ols
4,62.1453,21745200.0,4663.18,18073.2,134.437,118.343,mle_glm_start
5,62.1453,21745200.0,4663.18,18073.2,134.437,118.343,mle_lm_start


In [12]:
CSV.write(pwd() * "/data_raw/init_res_1.csv", df_res);