In [87]:
using LinearAlgebra, CSV, Random, Tables, JLD, Base.Iterators, Printf, COSMO

In [4]:
include("../Algorithm/utilities.jl")

refreshFile (generic function with 1 method)

In [5]:
data = load("../Data/communities.jld")["normCommunities"]
Sigma2 = data^2
# Matrix sqrt: via diagonalization and sqrt eigenvalues
raw_data = sqrt(data)';
n = size(data, 1);
N = n * n;
norms = data[1:n+1:N]
k = 7;
# All close.
maximum(abs.(raw_data * raw_data .- data))

6.661338147750939e-15

In [6]:
nodes = CSV.File("../Co-K5-Trace-Nodes.csv", header=false) |> Tables.matrix;
bounds = CSV.File("../Co-K5-Trace-Bounds.csv", header=false) |> Tables.matrix;
# nodes = CSV.File("../NC-K7-Trace-Nodes.csv") |> Tables.matrix;
# bounds = CSV.File("../NC-K7-Trace-Bounds.csv") |> Tables.matrix;

In [7]:
# node_id = 50
# node_id = 300
node_id = 25
y = nodes[:, node_id]
selected_data = findall(y .== 1)
i = sum(nodes[:, node_id] .== 1)
stillneed = k - i

4

In [8]:
s = maximum(svd(raw_data[:, selected_data]).S)
u = svd(raw_data[:, selected_data]).U[:, 1];
v = svd(raw_data[:, selected_data]).V[:, 1];
D = Matrix(1.0I, 101, 101)[:, selected_data];
s^2

2.556130035491392

In [9]:
variance = mapslices(norm, raw_data, dims=2)[:, 1] .^ 2
rank_one_term = (raw_data * u) .^ 2
rank_one_term[nodes[:, node_id] .== 0] .= 0
rank_one_term

101-element Vector{Float64}:
 0.0007729673812410654
 0.019286993029416652
 0.14001498734661805
 0.062413143981748825
 0.24497712730742385
 0.003112796601762257
 0.0995369970378409
 0.05273121668867637
 0.05229228709256075
 0.04736071875256587
 0.00019779963324352337
 0.18315070769677022
 0.0
 ⋮
 0.002879719537412127
 0.0001201647501583793
 0.16980192495279062
 0.14000549371287777
 0.015619690193852296
 0.008916094669294456
 0.017112186583702844
 0.002468834952860791
 0.03202895696331845
 0.11577049445416097
 0.0031406834671852905
 0.11376918610794398

In [10]:
function row_sums_k(M, y)
    M = abs.(M)
    row_quantities = zeros(n)
    stillneed = k - sum(y .== 1)
    for row_index in 1:n
        if y[row_index] == 0
            continue
        end
        row = copy(M[row_index, :])
        row_sum = sum(row[y .== 1])
        row[y .== 1] .= 0
        if y[row_index] == -1
            row_sum += row[row_index]
            row[row_index] = 0
            row_stillneed = stillneed -1
        else
            row_stillneed = stillneed
        end
        row_sum += sum(sort(row, rev=true)[1:row_stillneed])
        row_quantities[row_index] = row_sum
    end
    row_quantities
end

function frobenius_rows_k(M, y)
    row_sums_k(M .^ 2, y)
end

frobenius_rows_k (generic function with 1 method)

In [13]:
lb_proj_nodes_extra = rank_one_term .>= sort(rank_one_term .* Array{Float64}(y .== -1))[end-stillneed+1]
lb_proj_nodes_extra = lb_proj_nodes_extra .& (y .== -1)
lb_proj_nodes = lb_proj_nodes_extra .| (y .== 1)
lb_proj = sum(rank_one_term[lb_proj_nodes])
# Too slow to compute!
lb_diag = maximum(svd(raw_data[:, lb_proj_nodes]).S) ^ 2
lb_proj, lb_diag

(5.874086160768671, 6.119885886490068)

In [14]:
# How many variables can we exclude? Entries of frobenius_rows_k
# as an upper bound:
# The entry is a sum of k entries, including variance^2. When we
# update D' Sigma D from D: n x k-1 to D: n x k, then we add
# these squared entries to the sq Frob norm, add again (symmetric),
# then subtract variance^2 which appears once (inclusion-exclusion).
# This upper-bounds the differenc in squared UIN going from k-1 to
# k when adding this variable.
# RHS lower bound: Go from (lb_proj - lb_contribution)^2 to lb_proj.
# When adding each variable to the system, we must increase the UIN
# ||D' Sigma D|| by at least rank_one_contribution_lb, or else we
# have a contradiction.
rank_one_contribution_lb = minimum(selectsorted(variance[y .== -1], stillneed))
retain_var = 2*frobenius_rows_k(data, y) .- variance.^2 .>= lb_proj^2 - (lb_proj - rank_one_contribution_lb).^2
retain_var = retain_var .| (y .== 1)
sum((retain_var .== 0) .& (y .== -1))

87

In [15]:
# retain_var = y .!= 0
numer = frobenius_rows_k(Sigma2 .* retain_var .* retain_var', y)
denom = frobenius_rows_k((raw_data * u .* retain_var) * (raw_data * u .* retain_var)', y)

101-element Vector{Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [16]:
selectsorted((numer ./ denom)[numer .!= 0], k)

LoadError: BoundsError: attempt to access 4-element Vector{Float64} at index [1:7]

In [17]:
join([selected_data; sortperm((numer ./ denom) .* (numer .!= 0) .* (y .== -1), rev=true)[1:stillneed]], ";")

"78;85;86;84;78;85;86"

In [18]:
# UB: Need linear programming. This is a greedy attempt to solve
sqrt(sum(numer[[78;85;86;84;78;85;86]]) / sum(denom[[78;85;86;84;78;85;86]]))

20.808293987088003

In [19]:
# LB
norm(data * svd(raw_data[:, lb_proj_nodes]).U[:, 1])

21.575057897666632

In [69]:
struct FlatMajorize{T} <: COSMO.AbstractConvexCone{T}
    dim::Int64
    k::Int64
end

function majorize_linalg(x, k, i)
    x = copy(x)
    n = size(x)[1]
    C = zeros(0, n)
    for index in 1:(i-1)
        # Printf.@printf("index %d\n", index)
        C = [C; zeros(1, index-1) 1 -1 zeros(1, n - index - 1)]
    end
    C = [C; (ones(1, i) * (k-i)/i) -ones(1, n-i)]
    M = [I C'; C zeros(size(C,1), size(C,1))]
    res = M \ [x; zeros(size(C,1))]
    res[1:n]
end

function COSMO.project!(
    x::AbstractVector{T}, C::FlatMajorize{T}) where {T <: AbstractFloat}
    x0 = x
    n = size(x, 1)
    x_order = sortperm(x, rev=true)
    x = x[x_order]

    # If the largest i entries account for more than i/k of the
    # sum of entries, then we might update the vector so that
    # the first i entries are identical to one another, and they
    # now account for precisely i/k of the sum of entries.
    n = size(x)[1]
    x_sum = cumsum(x)
    x_sum ./= x_sum[end]
    if x_sum[1] <= 1.0 / k
        return x
    end
    max_break = 1
    k = C.k
    while max_break < k && x_sum[max_break+1] >= Float64(max_break+1)/k
        max_break += 1
    end

    function optimize_for_break(i)
        result = majorize_linalg(x, C.k, i)
        if any(diff(result) .> 0)
            return Inf
        end
        norm(result - x)
    end
    ~, break_point = findmin(optimize_for_break, 1:max_break)
    x = majorize_linalg(x, C.k, break_point)
    # x_perm = zeros(n)
    # x_perm[x_order] = x
    # x_perm
    x0[x_order] = x
end

In [70]:
# Greedy program, testing the majorize cone
model = COSMO.Model()

assemble!(
    model,
    zeros(5, 5),
    [-1 -2 -3 -4 -5],
    [
        # Majorize constraint (support of the solution)
        COSMO.Constraint(Matrix(1.0I, 5, 5), zeros(5), FlatMajorize{Float64}(5, 3), 5),
        # Domain of the values
        COSMO.Constraint(Matrix(1.0I, 5, 5), zeros(5), COSMO.Box(zeros(5), ones(5))),
        # L1 constraint
        COSMO.Constraint(-ones(1, 5), [1.], COSMO.Nonnegatives),
    ],
)
result = COSMO.optimize!(model)

LoadError: UndefVarError: k not defined

In [61]:
result.x

5-element Vector{Float64}:
 -1.7433086309195142e-17
 -8.15970555012591e-17
  2.3761374812192314e-16
 -8.205242041370298e-16
  1.0

In [71]:
# Greedy program, testing the spread condition.
model = COSMO.Model()

assemble!(
    model,
    zeros(5, 5),
    [-1 -2 -3 -4 -5],
    [
        # Domain of the values
        COSMO.Constraint(Matrix(1.0I, 5, 5), zeros(5), COSMO.Box(zeros(5), ones(5))),
        # L1 constraint
        COSMO.Constraint(-ones(1, 5), [1.], COSMO.Nonnegatives),
        # Meaningful support of the solution at least k = 3
        COSMO.Constraint(
            # L1 norm * 1/k minus the entry (identity) is nonnegative
            ones(5, 5) * 1.0/3
            - 1.0I,
            zeros(5),
            COSMO.Nonnegatives),
    ],
)
result = COSMO.optimize!(model)

>>> COSMO - Results
Status: 

[32mSolved[39m
Iterations: 159 (incl. 9 safeguarding iterations)
Optimal Objective: -4
Runtime: 1.5ms
Setup Time: 0.08ms

Avg Iter Time: 0.01ms

In [73]:
result.x

5-element Vector{Float64}:
  2.7790854124507226e-17
 -2.017447160069223e-17
  0.3333333333333328
  0.3333333333333324
  0.33333333333333276

In [84]:
model = COSMO.Model()

numer = frobenius_rows_k(Sigma2 .* retain_var .* retain_var', y)
denom = frobenius_rows_k((raw_data * u .* retain_var) * (raw_data * u .* retain_var)', y)
# numer = frobenius_rows_k(Sigma2, y)
# denom = frobenius_rows_k((raw_data * u) * (raw_data * u)', y)

assemble!(
    model,
    zeros(n, n),
    -numer,
    [
        # Domain of the values
        COSMO.Constraint(Matrix(1.0I, n, n), zeros(n), COSMO.Nonnegatives),
        # We may scale up the problem until the denom is 1.
        COSMO.Constraint(denom', [-1.], COSMO.ZeroSet),
        # However, at least k entries should have large magnitudes
        COSMO.Constraint(
            ones(n, n) * 1.0/k
            - 1.0I,
            zeros(n),
            COSMO.Nonnegatives,
        ),
    ]
)
result = COSMO.optimize!(model)

>>> COSMO - Results
Status: 

[32mSolved[39m
Iterations: 411 (incl. 11 safeguarding iterations)
Optimal Objective: -476.2
Runtime: 43.2ms
Setup Time: 2.87ms

Avg Iter Time: 0.1ms

In [85]:
sqrt(-result.obj_val)

21.8229012382063