In [58]:
using LinearAlgebra, Random, Combinatorics, StatsBase

In [59]:
#Random.seed!(1)

In [60]:
n = 15
k = 8
# Q is approximately low-rank.
M = randn(5,n) .* [1.0; 0.1; 0.05; 0.01; 0.01]
Q = Hermitian(M' * M);

In [61]:
i = 1
v = zeros(n)
weights = pweights(M[1,:].^2 / sum(M[1,:].^2))
support = sample(1:n, weights, i)
v[support] = eigen(Hermitian(Q[support,:][:,support])).vectors[:, end]

1-element Array{Float64,1}:
 1.0

In [62]:
r = Q*v / sqrt(norm((Q*v)[support]))

15-element Array{Float64,1}:
 -0.5387208660217729
  0.15064962343984398
 -0.9149472934736609
 -0.16680151657159478
 -1.4410771113597918
 -0.18575134746790786
  1.9134362782763399
  1.0491356741230013
  1.1607482147079071
 -0.9582695185148172
 -0.5469914608648216
 -0.7642876979963475
  0.4410599483160326
 -1.1458511855637021
  0.5974819367268513

In [63]:
eigen(Hermitian(Q[:,support][support,:] - r[support]*r[support]')).values

1-element Array{Float64,1}:
 0.0

In [64]:
eigen(Hermitian(Q[support,:][:, support]))

Eigen{Float64,Float64,Array{Float64,2},Array{Float64,1}}
values:
1-element Array{Float64,1}:
 3.661238391024011
vectors:
1×1 Array{Float64,2}:
 1.0

In [65]:
function SPCA(Q, n,k, support)
    i = length(support)
    choices = setdiff(1:n, support)
    values = Dict(p => maximum(eigen(Hermitian(Q[[support;p],:][:,[support;p]])).values)
                  for p = combinations(choices, k-i))
    opt, opt_support = findmax(values)
    opt_support = [support; opt_support]
    opt, opt_support
end

SPCA (generic function with 2 methods)

In [66]:
obj, opt_support = SPCA(Q, n, k, support)

(11.857889664305187, [7, 3, 5, 8, 9, 10, 12, 14])

In [67]:
function zd(arr)
    arr = copy(arr)
    arr[diagind(arr)] .= 0
    arr
end
function rowSums(arr)
    result = fill(NaN, n)
    for index = 1:size(arr,1)
        if index in support
            result[index] = 0
            stillneed = k - length(support)
        else
            result[index] = arr[index, index]
            stillneed = k - length(support) + 1
        end
        result[index] += sum(arr[index, support])
        row = copy(Vector(arr[index, :]))
        row[support] .= 0.
        row[index] = 0.
        result[index] += sum(sort(row, rev=true)[1:stillneed])
    end
    result[1:size(arr,1)]
end
# Note: Why doesn't finalSum work? We used a simple sort(rev=true)
# (via priority queue) in the implementation, and we haven't
# finished exploring this finalSum option.
function finalSum(arr)
    result = fill(NaN, n)
    for index = 1:size(arr,1)
        stillneed = k - length(support)
        result[index] = sum(arr[index, support])
        row = copy(Vector(arr[index, :]))
        row[support] .= 0.
        result[index] += sum(sort(row, rev=true)[1:stillneed])
    end
    result[1:size(arr,1)]
end

finalSum (generic function with 1 method)

In [68]:
# Frobenius upper bound
sqrt.(sum(sort(rowSums(Q.^2), rev=true)[1:n]))

12.70536101395267

In [69]:
sqrt(finalSum(rowSums(Q.^2)')[1])

12.085589050384218

In [70]:
gersh_ub = maximum(rowSums(abs.(Q)))

17.886329660846936

In [71]:
# Lower bound from trace of the rank-one system
finalSum(r' .^ 2)

1-element Array{Float64,1}:
 11.838483357289906

In [72]:
smallest_contribution = copy(r .^ 2)
smallest_contribution[support] .= 0
smallest_contribution = sort(smallest_contribution, rev=true)[k-length(support)]

0.5841356853085562

In [73]:
# Lower threshold for eliminating variables
threshold_sq = finalSum(r'.^2)[1] ^ 2 - (
    sqrt(finalSum(rowSums(Q.^2)')[1])
    - smallest_contribution
)^2

7.866258691819837

In [74]:
frobenius_cutout = rowSums(Q.^2)*2 - diag(Q.^2)

15-element Array{Float64,1}:
  7.142382647483415
  0.5446539129708388
 20.376347771200013
  0.6695832707130943
 47.915245763786686
  0.8261453880481558
 73.28235296250915
 26.229665139398374
 32.02788306193116
 22.3472929261179
  7.5382152001611615
 14.335500408516515
  4.8499994076319535
 31.046866751807244
  8.788041828818185

In [75]:
minimum(frobenius_cutout)

0.5446539129708388

In [76]:
# Should be empty. Variables that are in the support should satisfy the inequality.
intersect(setdiff(opt_support, support), findall(frobenius_cutout .< threshold_sq))

0-element Array{Int64,1}

In [77]:
# Lower threshold for eliminating variables
threshold_sq = finalSum(r'.^2)[1] ^ 2 - (
    finalSum(r'.^2)[1]
    - smallest_contribution
)^2

13.489346678998075

In [78]:
# Should be empty. Variables that are in the support should satisfy the inequality.
# However, some variables might be "forced on" and are not considered an issue.
intersect(
    setdiff(opt_support, support),
    findall(frobenius_cutout .< threshold_sq))

0-element Array{Int64,1}

In [79]:
opt_support

8-element Array{Int64,1}:
  7
  3
  5
  8
  9
 10
 12
 14

In [80]:
frobenius_cutout

15-element Array{Float64,1}:
  7.142382647483415
  0.5446539129708388
 20.376347771200013
  0.6695832707130943
 47.915245763786686
  0.8261453880481558
 73.28235296250915
 26.229665139398374
 32.02788306193116
 22.3472929261179
  7.5382152001611615
 14.335500408516515
  4.8499994076319535
 31.046866751807244
  8.788041828818185

In [88]:
n = 15
k = 8
i = 1
for sd = 1000:10000
    Random.seed!(sd)
    # Q is approximately low-rank.
    M = randn(5,n) .* [1.0; 0.1; 0.05; 0.01; 0.01]
    Q = Hermitian(M' * M);
    v = zeros(n)
    weights = pweights(M[1,:].^2 / sum(M[1,:].^2))
    support = sample(1:n, weights, i)
    v[support] = eigen(Hermitian(Q[support,:][:,support])).vectors[:, end]
    obj, opt_support = SPCA(Q, n,k, support)

    r = Q*v / sqrt(norm((Q*v)[support]))
    smallest_contribution = copy(r .^ 2)
    smallest_contribution[support] .= 0
    smallest_contribution = sort(smallest_contribution, rev=true)[k-length(support)]

    # Lower and upper bound
    # @assert finalSum(r'.^2)[1] < obj "@$sd obj ($obj)"
    f_sorted = sqrt.(sum(sort(rowSums(Q.^2), rev=true)[1:n]))
    f_constrained = finalSum(rowSums(Q.^2)')[1]
    @assert obj <= sqrt.(sum(sort(rowSums(Q.^2), rev=true)[1:n])) "@$sd obj ($obj) f_s ($f_sorted)"
    @assert obj <= finalSum(rowSums(Q.^2)')[1] "@$sd obj ($obj) f_c ($f_constrained)"

    # Lower threshold for eliminating variables
    threshold_sq = finalSum(r'.^2)[1] ^ 2 - (
        # finalSum(r'.^2)[1]
        sqrt(finalSum(rowSums(Q.^2)')[1])
        - smallest_contribution
    )^2
    # if threshold_sq > 0
    #     print("threshold @$sd\n")
    # end
    # Upper threshold for each variable's squared contribution
    frobenius_cutout = rowSums(Q.^2)*2 - diag(Q.^2)
    bad_var = intersect(
        setdiff(opt_support, support),
        findall(frobenius_cutout .< threshold_sq))
    @assert length(bad_var) == 0 "@$sd vars $bad_var"
end