Installing necessary packages:
```Julia
using Pkg

Pkg.add(["Random", "Distributions", "DataFrames", "GLM", "Statistics", "ProgressMeter", "Plots"])
Pkg.add(url = "https://github.com/ncn-foreigners/UnobservedCountEstimation.jl")
```

In [2]:
using Random, Distributions, DataFrames, GLM, Statistics, ProgressMeter, CSV, UnobservedCountEstimation

In [3]:
α = 1.18
β = .4
Q = 20 # Liczba państw
nsims = 10000

N_distr = Poisson(2000)
M_distr = (N, α) -> Poisson.(N .^ α)
p_distr = Uniform(.1,.75)

res = Vector{Any}(missing, nsims)

prog = Progress(10*Threads.nthreads(), "Simulation progress ...")

Threads.@threads for i in 1:(10*Threads.nthreads())
    Threads.threadid()
    next!(prog)
end # end for

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:00[39m


In [4]:
prog = Progress(nsims, "Simulation progress ...")

Threads.@threads for k in 1:nsims
    N = reduce(vcat, rand.(Binomial.(rand(N_distr, Q), .1), 1))
    M = reduce(vcat, rand.(M_distr(N, α), 1))
    p = rand(p_distr, Q)
    
    m = reduce(vcat, rand.(Poisson.(M .* p), 1))
    n = reduce(vcat, rand.(Binomial.(N, p), 1))

    df1 = DataFrame(
        y = m,
        x1 = log.(N),
        x2 = log.(n ./ N)
    )

    mm = glm(@formula(y ~ x1 + x2 + 0), df1, Poisson(), LogLink())
    α̂₁, β̂₁ = coef(mm)

    ols = lm(@formula(log(y) ~ x1 + x2 + 0), df1)
    α̂₂, β̂₂ = coef(ols)

    mle_1 = zhang_model(m, N, n; start = "glm")[1][4]
    mle_2 = zhang_model(m, N, n; start = "lm")[1][4]

    res[k] = [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂) mle_1 mle_2]
    #push!(res, [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂)])
    next!(prog)
end # end for

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:11[39m


In [5]:
df_res = DataFrame(reduce(vcat, [res[i] for i in 1:length(res) if isassigned(res, i)]), ["actual", "expected", "glm_est", "ols_est", "mle_glm_start", "mle_lm_start"])

describe(df_res)

Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,actual,10387.4,9604.0,10385.0,11182.0,0,Float64
2,expected,10386.9,9692.25,10387.9,11074.7,0,Float64
3,glm_est,10125.0,8083.57,10102.9,12805.5,0,Float64
4,ols_est,10042.6,7624.68,10024.2,13424.9,0,Float64
5,mle_glm_start,10089.9,7816.64,10075.2,13147.0,0,Float64
6,mle_lm_start,10089.9,7816.64,10075.2,13147.0,0,Float64


In [6]:
DataFrame(
    rel_bias = [mean((df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])        for k in 2:6],
    rel_mse  = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ df_res[:, 1]) for k in 2:6],
    rel_mae  = [mean(abs.(df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])    for k in 2:6],
    est      = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,-5.11886e-05,1.0084,0.00785276,Expected value
2,0.0252743,33.4738,0.0458633,glm
3,0.0332085,49.2042,0.0556424,ols
4,0.0286597,39.3878,0.0496685,mle_glm_start
5,0.0286597,39.3878,0.0496685,mle_lm_start


In [7]:
DataFrame(
    rel_bias = [mean((df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])        for k in 3:6],
    rel_mse  = [mean(((df_res[:, 2] .- df_res[:, k]) .^ 2) ./ df_res[:, 2]) for k in 3:6],
    rel_mae  = [mean(abs.(df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])    for k in 3:6],
    est      = ["glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,0.0252333,34.3343,0.0464634,glm
2,0.0331669,50.0607,0.056162,ols
3,0.0286182,40.2513,0.0502106,mle_glm_start
4,0.0286182,40.2513,0.0502106,mle_lm_start


In [8]:
CSV.write(pwd() * "/data_raw/init_res.csv", df_res);

### Higher population

In [9]:
α = 1.7
β = 1.5
Q = 7 # Liczba państw
nsims = 10000

N_distr = Poisson(8000)
M_distr = (N, α) -> Poisson.(N .^ α)
p_distr = Uniform(.1,.25)

res = Vector{Any}(missing, nsims)

prog = Progress(nsims, "Simulation progress ...")

Threads.@threads for k in 1:nsims
    N = reduce(vcat, rand.(Binomial.(rand(N_distr, Q), .1), 1))
    M = reduce(vcat, rand.(M_distr(N, α), 1))
    p = rand(p_distr, Q)
    
    m = reduce(vcat, rand.(Poisson.(M .* p), 1))
    n = reduce(vcat, rand.(Binomial.(N, p), 1))

    df1 = DataFrame(
        y = m,
        x1 = log.(N),
        x2 = log.(n ./ N)
    )

    mm = glm(@formula(y ~ x1 + x2 + 0), df1, Poisson(), LogLink())
    α̂₁, β̂₁ = coef(mm)

    ols = lm(@formula(log(y) ~ x1 + x2 + 0), df1)
    α̂₂, β̂₂ = coef(ols)
    
    mle_1 = zhang_model(m, N, n; start = "glm")[1][4]
    mle_2 = zhang_model(m, N, n; start = "lm")[1][4]

    res[k] = [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂) mle_1 mle_2]
    #push!(res, [sum(M) sum(N .^ α) sum(N .^ α̂₁) sum(N .^ α̂₂)])
    next!(prog)
end # end for

df_res = DataFrame(reduce(vcat, [res[i] for i in 1:length(res) if isassigned(res, i)]), ["actual", "expected", "glm_est", "ols_est", "mle_glm_start", "mle_lm_start"])

describe(df_res)

[32mSimulation progress ... 100%|████████████████████████████| Time: 0:00:03[39m


Row,variable,mean,min,median,max,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Int64,DataType
1,actual,603546.0,555090.0,603597.0,655810.0,0,Float64
2,expected,603546.0,554276.0,603595.0,655760.0,0,Float64
3,glm_est,539599.0,155620.0,520156.0,2658690.0,0,Float64
4,ols_est,540289.0,155340.0,519848.0,2793660.0,0,Float64
5,mle_glm_start,540266.0,156088.0,519640.0,2760460.0,0,Float64
6,mle_lm_start,540266.0,156088.0,519640.0,2760460.0,0,Float64


In [10]:
DataFrame(
    rel_bias = [mean((df_res[:, 1] .- df_res[:, k]) ./ df_res[:, 1])        for k in 2:6],
    rel_mse  = [mean(((df_res[:, 1] .- df_res[:, k]) .^ 2) ./ df_res[:, 1]) for k in 2:6],
    rel_mae  = [mean(abs.(df_res[:, 1] .- df_res[:, 2]) ./ df_res[:, 1])    for k in 2:6],
    est      = ["Expected value", "glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,-2.41093e-06,1.0016,0.0010277,Expected value
2,0.105929,36874.9,0.0010277,glm
3,0.104791,38148.9,0.0010277,ols
4,0.104828,38067.0,0.0010277,mle_glm_start
5,0.104828,38067.0,0.0010277,mle_lm_start


In [11]:
DataFrame(
    rel_bias = [mean((df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])        for k in 3:6],
    rel_mse  = [mean(((df_res[:, 2] .- df_res[:, k]) .^ 2) ./ df_res[:, 2]) for k in 3:6],
    rel_mae  = [mean(abs.(df_res[:, 2] .- df_res[:, k]) ./ df_res[:, 2])    for k in 3:6],
    est      = ["glm", "ols", "mle_glm_start", "mle_lm_start"]
)

Row,rel_bias,rel_mse,rel_mae,est
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,0.105932,36873.7,0.195919,glm
2,0.104794,38146.9,0.199357,ols
3,0.104832,38065.0,0.199213,mle_glm_start
4,0.104832,38065.0,0.199213,mle_lm_start


In [12]:
CSV.write(pwd() * "/data_raw/init_res_1.csv", df_res);