In [1]:
using LinearAlgebra, CSV, Random, Tables, JLD, Base.Iterators, Printf

In [2]:
include("../Algorithm/utilities.jl")

refreshFile (generic function with 1 method)

In [3]:
data = load("../Data/communities.jld")["communities"]
# Matrix sqrt: via diagonalization and sqrt eigenvalues
raw_data = sqrt(data)';
n = size(data, 1);
N = n * n;
norms = data[1:n+1:N]
k = 5;
# All close.
maximum(abs.(raw_data * raw_data .- data))

6.140921104957897e-16

In [4]:
nodes = CSV.File("../Co-K5-Trace-Nodes.csv", header=false) |> Tables.matrix;
bounds = CSV.File("../Co-K5-Trace-Bounds.csv", header=false) |> Tables.matrix;

In [38]:
# node_id = 50
# node_id = 300
node_id = 40
y = nodes[:, node_id]
selected_data = findall(y .== 1)
i = numpositive = sum(nodes[:, node_id] .== 1)
stillneed = k - i

2

In [6]:
bounds[:, node_id]

3-element Array{Float64,1}:
 0.19840666121229678
 0.3629025164807017
 0.4316969652057027

In [7]:
sum(norms[y .== 1]) + sum(selectsorted(norms .* (y .== -1), stillneed))

0.4316969652057027

In [8]:
s = maximum(svd(raw_data[:, selected_data]).S)
u = svd(raw_data[:, selected_data]).U[:, 1];
v = svd(raw_data[:, selected_data]).V[:, 1];
D = Matrix(1.0I, 101, 101)[:, selected_data];
s^2

0.14185918642419382

In [9]:
rank_one_term = (raw_data * u) .^ 2
rank_one_term[nodes[:, node_id] .== 0] .= 0
rank_one_term

101-element Array{Float64,1}:
 7.926078166037157e-6
 0.00042107519671213234
 0.010091876290498288
 0.0
 0.011482437882389994
 0.00013178020948110015
 0.0023632781250717723
 0.0009055802857870314
 0.0012990605554894848
 0.0016750788941365209
 ⋮
 0.0063246338140331565
 0.00029115092252889475
 0.0005201627269077583
 0.0008469155188951991
 3.40177036420949e-5
 0.0016327144857154394
 0.005953773016861563
 0.00018478862022754878
 0.0066479897694745995

In [10]:
# Upper bound on the contribution from adding one variable into ||E||^2.
# The upper bound comes from ||E||_F^2
# upper bounded by ||(I-uu')MD||_F^2. Also, we know that normCommunities
# has column norm of 1 for every variable.
# We should add another upper bound based on taking k columns from this
# row into ||E||_F.
residual_term = norms .- rank_one_term
residual_term[y .== 0] .= 0
residual_term

101-element Array{Float64,1}:
 0.016097164406534425
 0.02638209318953039
 0.054141001469726655
 0.0
 0.03214735697131307
 0.05392057454670922
 0.02172243664593466
 0.01970490065528001
 0.02642470524769024
 0.030432363176178794
 ⋮
 0.03542551701477036
 0.032597381433193566
 0.03968867072276389
 0.038444722591622994
 0.011947288809978047
 0.03961357916191083
 0.04651232744206663
 0.05757289255173794
 0.04763397466371458

In [11]:
function frobenius_rows_k(M, y)
    M = M .^ 2
    row_quantities = zeros(n)
    stillneed = k - sum(y .== 1)
    for row_index in 1:n
        if y[row_index] == 0
            continue
        end
        row = copy(M[row_index, :])
        row_sum = sum(row[y .== 1])
        row[y .== 1] .= 0
        if y[row_index] == -1
            row_sum += row[row_index]
            row[row_index] = 0
            row_stillneed = stillneed -1
        else
            row_stillneed = stillneed
        end
        row_sum += sum(sort(row, rev=true)[1:row_stillneed])
        row_quantities[row_index] = row_sum
    end
    row_quantities
end

frobenius_rows_k (generic function with 1 method)

In [16]:
# frobenius_rows_k(data - raw_data * u * u' * raw_data, y)

In [12]:
sum((y .== -1) .& (frobenius_rows_k(data - raw_data * u * u' * raw_data, y) .< residual_term))

81

In [13]:
lb_proj_nodes_extra = rank_one_term .>= sort(rank_one_term .* Array{Float64}(y .== -1))[end-stillneed+1]
lb_proj_nodes_extra = lb_proj_nodes_extra .& (y .== -1)
lb_proj_nodes = lb_proj_nodes_extra .| (y .== 1)
lb_proj = sum(rank_one_term[lb_proj_nodes])
# Too slow to compute!
lb_diag = maximum(svd(raw_data[:, lb_proj_nodes]).S) ^ 2
lb_proj, lb_diag

(0.2202679971348654, 0.2732082054995253)

In [14]:
bounds[:, node_id]

3-element Array{Float64,1}:
 0.19840666121229678
 0.3629025164807017
 0.4316969652057027

In [15]:
lambda_1 = bounds[2, node_id]
lambda_2 = bounds[3, node_id] - lb_diag
lambda_2_lb = bounds[3, node_id] - lambda_1
lambda_2_lb, lambda_2

(0.06879444872500101, 0.15848875970617743)

In [17]:
# Contribution to ||E||^2 from the row sums (squared Frobenius norm)
E = data - (raw_data * u * u' * raw_data)
e_squared_program = mapslices(
    (row) -> sum(row[y .== 1].^2) + sum(selectsorted(row.^2 .* (y .== -1), stillneed)),
    E,
    dims=2)[:, 1]
maximum(e_squared_program)

0.026386179743713507

In [19]:
# Upper bound on ||E||_F
lambda_2 = sqrt(sum(e_squared_program[y .== 1]) + sum(selectsorted(e_squared_program .* (y .== -1), stillneed)))

0.18117842668099743

In [20]:
# Upper bound performed poorly
lambda_2 = bounds[3, node_id] - lb_diag

0.15848875970617743

In [21]:
sum(residual_term[y .== 1]) + sum(selectsorted(residual_term[y .== -1], stillneed))

0.24795709056878384

In [22]:
# Multiplier for 
mult_parallel = 1. / lambda_1
mult_perp = lambda_2 / (lb_proj - lambda_2)^2
mult_linearize = mult_parallel + mult_perp
linear_program = rank_one_term + mult_linearize * e_squared_program

101-element Array{Float64,1}:
 0.02460167690471971
 0.057995320390979814
 0.18262281400917196
 0.1460602976499621
 0.08803754640128536
 0.2392564293228971
 0.04348631750007592
 0.03823110015959426
 0.05480160623763157
 0.07411834186327422
 ⋮
 0.10085579242512224
 0.08710367055087934
 0.10924901820611159
 0.10349046799096251
 0.012162377026588872
 0.10449036260751611
 0.1293754007046165
 0.18853743075691745
 0.15193791934247894

In [23]:
# Linear program solution
sum(linear_program[y .== 1]) + sum(selectsorted(linear_program[y .== -1], stillneed))

1.6338898195663925

In [24]:
# Conservative program in case we cannot bound sin theta_perp.
linear_program_lam_2 = rank_one_term + mult_parallel .* e_squared_program
lambda_2 + sum(linear_program_lam_2[y .== 1]) + sum(selectsorted(linear_program_lam_2[y .== -1], stillneed))

0.4546649113544162

In [78]:
using JuMP, COSMO

In [80]:
# ||E|| <= ||E D^T M u|| / (||Sigma|| ||D^T M u||)
model = Model(COSMO.Optimizer)
@variable(model, 0 <= x[1:n] <= 1)
@constraint(model, linear_program' * x >= lb_diag)
@constraint(model, lambda_2 + linear_program_lam_2' * x >= lb_diag)
@constraint(model, Array{Float64}(y .== 1)' * x == numpositive)
@objective(model, Min, rank_one_term' * x)
optimize!(model)
solution_summary(model)

------------------------------------------------------------------
          COSMO v0.8.6 - A Quadratic Objective Conic Solver
                         Michael Garstka
                University of Oxford, 2017 - 2022
------------------------------------------------------------------

Problem:  x ∈ R^{101},
          constraints: A ∈ R^{205x101} (407 nnz),
          matrix size to factor: 306x306,
          Floating-point precision: Float64
Sets:     Nonnegative of dim: 204
          ZeroSe of dim: 1
Settings: ϵ_abs = 1.0e-05, ϵ_rel = 1.0e-05,
          ϵ_prim_inf = 1.0e-04, ϵ_dual_inf = 1.0e-04,
          ρ = 0.1, σ = 1e-06, α = 1.6,
          max_iter = 5000,
          scaling iter = 10 (on),
          check termination every 25 iter,
          check infeasibility every 40 iter,
          KKT system solver: QDLDL
Acc:      Anderson Type2{QRDecomp},
          Memory size = 15, RestartedMemory,	
          Safeguarded: true, tol: 2.0
Setup Time: 0.21ms

Iter:	Objective:	Primal Res:	Dual

4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4150	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4175	 1.4192e-01	3.0287e-04	1.6508e-05	2.3669e-03
4200	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4225	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4250	 1.4192e-01	3.0287e-04	1.6508e-05	2.3669e-03
4275	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4300	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4325	 1.4192e-01	3.0287e-04	1.6508e-05	2.3669e-03
4350	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4375	 1.4192e-01	1.4022e-03	1.6509e-05	2.3669e-03
4400	 1.4191e-01	3.0287e-04	1.6508e-05	2.3669e-03
4425	 1.4191e-01	1.4022e-03	1.6509e-05	2.3669e-03
4450	 1.4191e-01	1.4022e-03	1.6509e-05	2.3669e-03
4475	 1.4191e-01	3.0287e-04	1.6508e-05	2.3669e-03
4500	 1.4191e-01	1.4022e-03	1.6509e-05	2.3669e-03
4525	 1.4191e-01	1.4022e-03	1.6509e-05	2.3669e-03
4550	 1.4191e-01	3.0287e-04	1.6508e-05	2.3669e-03
4575	 1.4191e-01	1.4022e-03	1.6509e-05	2.3669e-03
4600	 1.4191e-01	1.4022e-03	1.6509e-05	2.3669e-03
4625	 1.

* Solver : COSMO

* Status
  Termination status : ITERATION_LIMIT
  Primal status      : NEARLY_FEASIBLE_POINT
  Dual status        : NEARLY_FEASIBLE_POINT
  Message from the solver:
  "Max_iter_reached"

* Candidate solution
  Objective value      : 0.14191307727940491

* Work counters
  Solve time (sec)   : 0.12579


In [54]:
objective_value(model)

0.14186760554017683

In [64]:
e_squared_program = mapslices(
    (row) -> max(
        sum(row[y .== 1]) + sum(selectsorted(row .* (y .== -1), stillneed)),
        sum(-row[y .== 1]) + sum(selectsorted(-row .* (y .== -1), stillneed))
    ) ^ 2,
    E .* (raw_data * u)',
    dims=2)[:, 1] / objective_value(model)
maximum(e_squared_program)

0.0075701981369719175

In [65]:
# Multiplier for 
mult_parallel = 1. / lambda_1
mult_perp = lambda_2 / (lb_proj - lambda_2)^2
mult_linearize = mult_parallel + mult_perp
linear_program = rank_one_term + mult_linearize * e_squared_program

101-element Array{Float64,1}:
 0.0035697640972841655
 0.001829931839392928
 0.029131912400482168
 0.015478315587490023
 0.020984156999147033
 0.013682225379852088
 0.0042156349083965665
 0.0021311564632373724
 0.0025955789508253146
 0.00711653605568502
 ⋮
 0.01220808169183182
 0.0022716109777939568
 0.003493519917667502
 0.004702924989550664
 0.0005228444197042201
 0.01577395859727945
 0.023439471618106975
 0.015451197235423842
 0.02110452132574555

In [66]:
# Linear program solution
sum(linear_program[y .== 1]) + sum(selectsorted(linear_program[y .== -1], stillneed)), lb_diag

(0.5696705588806852, 0.2732082054995253)

In [67]:
# Conservative program in case we cannot bound sin theta_perp.
linear_program_lam_2 = rank_one_term + mult_parallel .* e_squared_program
lambda_2 + sum(linear_program_lam_2[y .== 1]) + sum(selectsorted(linear_program_lam_2[y .== -1], stillneed))

0.39996830001094297

In [74]:
# ||E|| <= ||E D^T M u|| / (||Sigma|| ||D^T M u||)
model = Model(COSMO.Optimizer)
MathOptInterface.Utilities.reset_optimizer(model)
@variable(model, 0 <= x[1:n] <= 1)
@constraint(model, linear_program' * x >= lb_proj)
@constraint(model, lambda_2 + linear_program_lam_2' * x >= lb_diag)
@constraint(model, Array{Float64}(y .== 1)' * x == numpositive)
@objective(model, Min, rank_one_term' * x)
optimize!(model)
solution_summary(model)

UndefVarError: UndefVarError: MathOptInterface not defined