# Experiment 1

In [1]:
using KLIEPInference
using ProximalBase, CoordinateDescent
using LinearAlgebra, SparseArrays, Statistics, Random
using Distributions, JLD

In [2]:
function experiment1(file, idx, nx, ny, nrep)
    θx = pack(read(file, "Θx"))
    θy = pack(read(file, "Θy"))
    close(file)

    p = length(θx)

    # for both steps, we use autoscaling procedures with canonical choices of λ
    λ1 = 1.01 * quantile(Normal(), 1. - 0.05 / p)
    λ2 = sqrt(2. * log(p) / ny)

    res = zeros(Float64, nrep, 4, 2)
    for rep = 1:nrep
        if mod(rep, div(nrep, 10)) === 0
            println("$(rep) / $(nrep)")
        end

        # generate samples
        spl = IsingSampler(θx; thin=2000)
        X = rand(spl, nx)
        spl = IsingSampler(θy; thin=2000)
        Y = rand(spl, ny)

        Ψx = Ψising(X)
        Ψy = Ψising(Y)

        # oracle estimate
        supp = findall(!iszero, θx - θy)
        θ = KLIEP(Ψx[supp, :], Ψy[supp, :], CD_KLIEP())

        ω = KLIEP_Hessian(θ, Ψy[supp, :])\(supp .=== idx)
        σ = stderr_SparKLIE(Ψx[supp, :], Ψy[supp, :], θ, ω)

        res[rep, 1, 1] = θ[findfirst(isequal(idx), supp)] - θx[idx] + θy[idx]
        res[rep, 1, 2] = res[rep, 1, 1] / σ

        # step 1
        θ = spKLIEP(Ψx, Ψy, λ1, CD_KLIEP(); loadings=true)

        # naïve re-fitted estimate
        supp = KLIEPInference._find_supp(idx, θ)
        θ[supp] = KLIEP(Ψx[supp, :], Ψy[supp, :], CD_KLIEP())

        ω = KLIEP_Hessian(θ[supp], Ψy[supp, :])\(supp .=== idx)
        σ = stderr_SparKLIE(Ψx[supp, :], Ψy[supp, :], θ[supp], ω)

        res[rep, 2, 1] = θ[idx] - θx[idx] + θy[idx]
        res[rep, 2, 2] = res[rep, 2, 1] / σ

        # step 2
        H = KLIEP_Hessian(θ, Ψy)
        ω = Hinv_row(H, idx, λ2)
        supp = KLIEPInference._find_supp(idx, ω)
        ω[supp] = view(H, supp, supp)\(supp .=== idx)

        # SparKLIE+1
        θ1 = KLIEPInference._debias1(Ψx, Ψy, θ, ω, idx; refit=false)
        σ = stderr_SparKLIE(Ψx, Ψy, θ, ω)

        res[rep, 3, 1] = θ1 - θx[idx] + θy[idx]
        res[rep, 3, 2] = res[rep, 3, 1] / σ

        # SparKLIE+2
        θ2 = KLIEPInference._debias2(Ψx, Ψy, θ, ω, idx)

        supp = KLIEPInference._find_supp(idx, θ, ω)
        ω = KLIEP_Hessian(θ[supp], Ψy[supp, :])\(supp .=== idx)
        σ = stderr_SparKLIE(Ψx[supp, :], Ψy[supp, :], θ[supp], ω)

        res[rep, 4, 1] = θ2 - θx[idx] + θy[idx]
        res[rep, 4, 2] = res[rep, 4, 1] / σ
    end
    res
end

experiment1 (generic function with 1 method)

In [3]:
Random.seed!(1218)

MersenneTwister(UInt32[0x000004c2], Random.DSFMT.DSFMT_state(Int32[-854650020, 1073556053, 1620855344, 1073630175, 1698263943, 1073736988, 691512570, 1073738378, -673392900, 1073121727  …  47415821, 1072887989, -496483395, 1073429484, -1104767902, 678491891, -1379894893, 290756459, 382, 0]), [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], UInt128[0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000  …  0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x0000000000000

## $m = 25$, $n_x = 150$, $n_y = 300$

### Chain (1)

In [4]:
file = jldopen("graphs/chain1_25.jld", "r")
idx = KLIEPInference.trimap(5, 6)  #edge of interest
res = experiment1(file, idx, 150, 300, 1000)
@save "res/res_chain1_25.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [5]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.00505 0.08033 -0.01894 -0.00621]
coverage: [0.96 0.85 0.934 0.945]


### Chain (2)

In [6]:
file = jldopen("graphs/chain2_25.jld", "r")
idx = KLIEPInference.trimap(5, 6)  #edge of interest
res = experiment1(file, idx, 150, 300, 1000)
@save "res/res_chain2_25.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [7]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.00819 0.0692 0.00526 -0.01013]
coverage: [0.962 0.907 0.948 0.948]


### Tree (1)

In [8]:
file = jldopen("graphs/tree1_25.jld", "r")
idx = KLIEPInference.trimap(1, 3)  #edge of interest
res = experiment1(file, idx, 150, 300, 1000)
@save "res/res_tree1_25.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [9]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.01763 0.06698 -0.02323 -0.04143]
coverage: [0.972 0.925 0.932 0.958]


### Tree (2)

In [10]:
file = jldopen("graphs/tree2_25.jld", "r")
idx = KLIEPInference.trimap(1, 3)  #edge of interest
res = experiment1(file, idx, 150, 300, 1000)
@save "res/res_tree2_25.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [11]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.0077 0.03803 0.01168 -0.00587]
coverage: [0.972 0.946 0.957 0.977]


## $m = 50$, $n_x = 300$, $n_y = 600$

### Chain (1)

In [12]:
file = jldopen("graphs/chain1_50.jld", "r")
idx = KLIEPInference.trimap(5, 6)  #edge of interest
res = experiment1(file, idx, 300, 600, 1000)
@save "res/res_chain1_50.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [13]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.0036 0.07692 -0.02301 -0.01673]
coverage: [0.946 0.822 0.943 0.948]


### Chain (2)

In [14]:
file = jldopen("graphs/chain2_50.jld", "r")
idx = KLIEPInference.trimap(5, 6)  #edge of interest
res = experiment1(file, idx, 300, 600, 1000)
@save "res/res_chain2_50.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [15]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.00039 0.07636 0.01516 -0.00369]
coverage: [0.962 0.839 0.953 0.955]


### Tree (1)

In [16]:
file = jldopen("graphs/tree1_50.jld", "r")
idx = KLIEPInference.trimap(1, 3)  #edge of interest
res = experiment1(file, idx, 300, 600, 1000)
@save "res/res_tree1_50.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [17]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [0.00256 0.08975 0.00875 -0.00539]
coverage: [0.976 0.874 0.973 0.979]


### Tree (2)

In [18]:
file = jldopen("graphs/tree2_50.jld", "r")
idx = KLIEPInference.trimap(1, 3)  #edge of interest
res = experiment1(file, idx, 300, 600, 1000)
@save "res/res_tree2_50.jld" res

100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
1000 / 1000


In [19]:
println("avg bias: $(round.(mean(res[:, :, 1], dims=1), digits=5))")
println("coverage: $(mean(abs.(res[:, :, 2]) .< quantile(Normal(), 0.975), dims=1))")

avg bias: [-0.00611 0.05306 -0.00248 -0.00826]
coverage: [0.968 0.913 0.952 0.977]
