In [1]:
using LinearAlgebra, CSV, Random, Tables

In [2]:
data = CSV.File("../NC-Data.csv") |> Tables.matrix;
# Matrix sqrt: via diagonalization and sqrt eigenvalues
raw_data = sqrt(data)';
n = size(data, 1);
N = n * n;
k = 7;
# All close.
maximum(abs.(raw_data * raw_data .- data))

4.884981308350689e-15

In [3]:
nodes = CSV.File("../NC-K7-Trace-Nodes.csv") |> Tables.matrix;
bounds = CSV.File("../NC-K7-Trace-Bounds.csv") |> Tables.matrix;

In [4]:
# node_id = 50
node_id = 300
sum(nodes[:, node_id] .== 1)

3

In [5]:
bounds[:, node_id]

3-element Array{Float64,1}:
 3.608137513865839
 6.26741838326191
 7.0

In [6]:
selected_data = findall(nodes[:, node_id] .== 1)

3-element Array{Int64,1}:
 58
 60
 62

In [7]:
s = maximum(svd(raw_data[:, selected_data]).S)
u = svd(raw_data[:, selected_data]).U[:, 1];
v = svd(raw_data[:, selected_data]).V[:, 1];
D = Matrix(1.0I, 101, 101)[:, selected_data];
s

1.6649665589270974

In [26]:
rank_one_term = (raw_data * u) .^ 2
rank_one_term[nodes[:, node_id] .== 0] .= 0
rank_one_term

101-element Array{Float64,1}:
 0.042608386423828526
 0.12169366148453958
 0.0010827657107646796
 0.0
 0.3576958952379641
 0.590931332887428
 0.0028134344289715506
 0.045710787852978954
 0.0134758787384476
 0.0402507143194045
 ⋮
 0.19651099861244686
 0.03521501895354107
 0.00462342769518394
 0.026081129247667985
 0.0008624637095038933
 0.37524469637807883
 0.11466762911354514
 0.03909959370980699
 0.0

In [35]:
lb_proj_nodes = rank_one_term .>= sort(rank_one_term)[end-k+1]
lb_proj = sum(rank_one_term[lb_proj_nodes])
lb_diag = maximum(svd(raw_data[:, lb_proj_nodes]).S) ^ 2

6.070332386128684

In [9]:
bounds[:, node_id]

3-element Array{Float64,1}:
 3.608137513865839
 6.26741838326191
 7.0

In [50]:
lambda_1 = bounds[2, node_id]
lambda_2 = bounds[3, node_id] - lb_diag
lambda_2_lb = bounds[3, node_id] - lambda_1
lambda_1 - lambda_2

5.337750769390595

In [37]:
rank_n1_term = (lambda_1 - lambda_2) * (
    4 *
    mapslices(norm, sqrt(raw_data * (I - u * u') * raw_data); dims=2) .^ 2
    / max(sum(sort(rank_one_term)[1:k]), lambda_1 - lambda_2)^2
)[:, 1]
rank_n1_term[nodes[:, node_id] .== 0] .= 0
rank_n1_term

101-element Array{Float64,1}:
 0.7174494688409567
 0.6581845998146797
 0.7485679099275601
 0.0
 0.4813294082184126
 0.30654759638339163
 0.7472709825939462
 0.7151245933919612
 0.7392807674114611
 0.7192162595408451
 ⋮
 0.6021180351807897
 0.7229899054703189
 0.7459146110851107
 0.7298346534553724
 0.7487329996897313
 0.46817869969095205
 0.6634497631200071
 0.7200788855114664
 0.0

In [51]:
rank_n1_term = (lambda_1 - lambda_2) * (
    mapslices(norm, sqrt(raw_data * (I - u * u') * raw_data); dims=2) .^ 2
    / lb_diag^2
)[:, 1]
rank_n1_term[nodes[:, node_id] .== 0] .= 0
rank_n1_term

101-element Array{Float64,1}:
 0.13868291670869157
 0.12722702294630495
 0.14469810852462475
 0.0
 0.09304093058600542
 0.05925562234392246
 0.14444741258965477
 0.1382335184970349
 0.14290290472566924
 0.13902443718938848
 ⋮
 0.11638936113044836
 0.1397538826024163
 0.14418522610105122
 0.14107697176190329
 0.14473002036055324
 0.09049889983041727
 0.12824477853167043
 0.13919118271061337
 0.0

In [52]:
sum(sort(rank_one_term + rank_n1_term)[end-k+1:end])

6.118050927535401

In [53]:
rank_one_term + rank_n1_term

101-element Array{Float64,1}:
 0.1812913031325201
 0.24892068443084453
 0.14578087423538943
 0.0
 0.4507368258239695
 0.6501869552313505
 0.14726084701862632
 0.18394430635001385
 0.15637878346411685
 0.17927515150879297
 ⋮
 0.3129003597428952
 0.17496890155595737
 0.14880865379623517
 0.16715810100957126
 0.14559248407005712
 0.4657435962084961
 0.24291240764521557
 0.17829077642042035
 0.0

In [54]:
sort(rank_one_term + rank_n1_term)[Int32(floor(n * 0.5))]

0.14872713356712808

In [55]:
maximum(rank_one_term + rank_n1_term)

0.9723464593696755

In [56]:
# We have a k-subset where the difference, due to removing
# vs retaining one variable, is at least its entry in
# rank_one_term (this is a projection of the variables
# which might not even diagonalize/maximize the contribution
# of any of the variables to the system).
sum(((rank_one_term .+ rank_n1_term)
    .< sort(rank_one_term)[end-k+1])
    .& (rank_one_term .> 0))

60