In [140]:
using LinearAlgebra, CSV, Random, Tables

In [141]:
data = CSV.File("../NC-Data.csv") |> Tables.matrix;
# Matrix sqrt: via diagonalization and sqrt eigenvalues
raw_data = sqrt(data)';
n = size(data, 1);
N = n * n;
k = 7;
# All close.
maximum(abs.(raw_data * raw_data .- data))

4.884981308350689e-15

In [142]:
nodes = CSV.File("../NC-K7-Trace-Nodes.csv") |> Tables.matrix;
bounds = CSV.File("../NC-K7-Trace-Bounds.csv") |> Tables.matrix;

In [143]:
# node_id = 50
# node_id = 300
node_id = 229
y = nodes[:, node_id]
sum(nodes[:, node_id] .== 1)

4

In [144]:
bounds[:, node_id]

3-element Array{Float64,1}:
 4.5129968872085975
 6.584373256021302
 7.0

In [145]:
selected_data = findall(nodes[:, node_id] .== 1)

4-element Array{Int64,1}:
 20
 84
 85
 86

In [146]:
s = maximum(svd(raw_data[:, selected_data]).S)
u = svd(raw_data[:, selected_data]).U[:, 1];
v = svd(raw_data[:, selected_data]).V[:, 1];
D = Matrix(1.0I, 101, 101)[:, selected_data];
s

1.9342690514902823

In [147]:
rank_one_term = (raw_data * u) .^ 2
rank_one_term[nodes[:, node_id] .== 0] .= 0
rank_one_term

101-element Array{Float64,1}:
 0.0011302640621757621
 0.02770066314539759
 0.10151772407545055
 0.0
 0.25610915747693164
 0.00013428537660249162
 0.09003700443881506
 0.05574370907683892
 0.05354760607565086
 0.053561867714760604
 ⋮
 0.1484103392065695
 0.016873719861435224
 0.0073537692505327885
 0.01652816329702802
 0.002684390237879137
 0.028361399876088253
 0.1262101742004988
 0.00255421792241594
 0.0

In [148]:
# Upper bound on the contribution from adding one variable into ||E||^2.
# The upper bound comes from ||E||_F^2
# upper bounded by ||(I-uu')MD||_F^2. Also, we know that normCommunities
# has column norm of 1 for every variable.
# We should add another upper bound based on taking k columns from this
# row into ||E||_F.
residual_term = 1 .- rank_one_term

101-element Array{Float64,1}:
 0.9988697359378242
 0.9722993368546025
 0.8984822759245494
 1.0
 0.7438908425230684
 0.9998657146233975
 0.909962995561185
 0.9442562909231611
 0.9464523939243491
 0.9464381322852394
 ⋮
 0.8515896607934305
 0.9831262801385647
 0.9926462307494672
 0.983471836702972
 0.9973156097621209
 0.9716386001239118
 0.8737898257995012
 0.997445782077584
 1.0

In [149]:
function frobenius_rows_k(M, y)
    M = M .^ 2
    row_quantities = zeros(n)
    stillneed = k - sum(y .== 1)
    for row_index in 1:n
        if y[row_index] == 0
            continue
        end
        row = copy(M[row_index, :])
        row_sum = sum(row[y .== 1])
        row[y .== 1] .= 0
        if y[row_index] == -1
            row_sum += row[row_index]
            row[row_index] = 0
            row_stillneed = stillneed -1
        else
            row_stillneed = stillneed
        end
        row_sum += sum(sort(row, rev=true)[1:row_stillneed])
        row_quantities[row_index] = row_sum
    end
    row_quantities
end

frobenius_rows_k (generic function with 1 method)

In [150]:
# frobenius_rows_k(data - raw_data * u * u' * raw_data, y)

In [151]:
sum((y .== -1) .& (frobenius_rows_k(data - raw_data * u * u' * raw_data, y) .< residual_term))

9

In [152]:
stillneed = k - sum(y .== 1)
lb_proj_nodes_extra = rank_one_term .>= sort(rank_one_term .* Array{Float64}(y .== -1))[end-stillneed+1]
lb_proj_nodes_extra = lb_proj_nodes .& (y .== -1)
lb_proj_nodes = lb_proj_nodes_extra .| (y .== 1)
lb_proj = sum(rank_one_term[lb_proj_nodes])
lb_diag = maximum(svd(raw_data[:, lb_proj_nodes]).S) ^ 2

5.568112215978401

In [153]:
bounds[:, node_id]

3-element Array{Float64,1}:
 4.5129968872085975
 6.584373256021302
 7.0

In [162]:
lambda_1 = bounds[2, node_id]
lambda_2 = bounds[3, node_id] - lb_diag
lambda_2_lb = bounds[3, node_id] - lambda_1
lambda_2

1.4318877840215993

In [160]:
# lambda_2 = svd(raw_data[:, selected_data]).S[2] + sum(sort(residual_term, rev=true)[1:stillneed])

In [163]:
# Multiplier for 
mult_parallel = 1. / lambda_1
mult_perp = lambda_2 / (lb_proj - lambda_2)^2
mult_linearize = mult_parallel + mult_perp
linear_program = rank_one_term + mult_linearize * residual_term

101-element Array{Float64,1}:
 0.34254326433823606
 0.3600319189825617
 0.40861835840416716
 0.34179932376819144
 0.5103705444086666
 0.3418877104938792
 0.4010617409757059
 0.37848987077823604
 0.3770443942977793
 0.3770537813182856
 ⋮
 0.4394831093937476
 0.35290561759153416
 0.3466395796617448
 0.352678172027165
 0.34356619123803356
 0.3604668163455135
 0.4248709457742941
 0.34348051173196903
 0.34179932376819144

In [164]:
# Linear program solution
sum(linear_program[y .== 1]) + sum(sort(linear_program[y .== -1], rev=true)[1:stillneed])

6.524788207252006

In [165]:
[sum((sort(rank_one_term, rev=true)[k] .> linear_program) .& (y .== -1)) sum(y .== -1)]

1×2 Array{Int64,2}:
 71  75