# Voting records of the 109th United States Senate

In [1]:
using KLIEPInference
using ProximalBase, CoordinateDescent
using LinearAlgebra, SparseArrays, Statistics
using Distributions, StatsBase
using DelimitedFiles

In [2]:
file, colnames = readdlm("data14_proc.csv", ',', String, '\n'; header=true)
names = rstrip.(titlecase.(lowercase.(file[:,1])))

file, colnames = readdlm("data_senate_global.csv", ',', Int, '\n'; header=true)
votes = file[:, 2:end]

file, colnames = readdlm("dates_senate_global.csv", ',', String, '\n'; header=true)
dates = file[:, 2]

yyyy = [parse(Int, split(dates[i], "-")[1]) for i = 1:length(dates)];

In [3]:
@show hub = findfirst(names .== "Menendez")

θ_ind = zeros(Int, 99)
for k = 1:99
    if k < hub
        θ_ind[k] = KLIEPInference.trimap(k, hub)
    else
        θ_ind[k] = KLIEPInference.trimap(hub, k+1)
    end
end

Ψx = Ψising(convert.(Bool, transpose(votes[findfirst(yyyy .== 2006):findlast(yyyy .== 2006), :])))
Ψy = Ψising(convert.(Bool, transpose(votes[findfirst(yyyy .== 2005):findlast(yyyy .== 2005), :])));

hub = findfirst(names .== "Menendez") = 59


In [4]:
p, nx = size(Ψx)
ny = size(Ψy, 2)

println("step 1")
λ1 = 1.01 * quantile(Normal(), 1. - 0.05 / p)
θ = spKLIEP(Ψx, Ψy, λ1, CD_KLIEP(); loadings=true)

println("step 2")
λ2 = sqrt(2. * log(p) / ny)
H = KLIEP_Hessian(spzeros(Float64, p), Ψy)
Hinv = Vector{SparseIterate{Float64}}(undef, 99)
for k = 1:99
    ω = Hinv_row(H, θ_ind[k], λ2)

    supp = KLIEPInference._find_supp(θ_ind[k], ω)
    h = view(H, supp, supp)
    δ = (supp .== θ_ind[k])
    ω[supp] = h\δ

    Hinv[k] = ω
end

println("step 3 + bootstrap...")
boot1, boot2 = boot_SparKLIE(Ψx, Ψy, θ, Hinv, θ_ind);

step 1
step 2
step 3 + bootstrap...


In [5]:
CI = simulCI(boot1, 0.05)

δ = boot1.θhat .* broadcast(|, CI[:,1] .> 0., CI[:,2] .< 0.)

@show count(!iszero, δ);

count(!iszero, δ) = 0


In [6]:
CI = simulCI(boot2, 0.05)

δ = boot2.θhat .* broadcast(|, CI[:,1] .> 0., CI[:,2] .< 0.)

@show count(!iszero, δ);

count(!iszero, δ) = 0
