In [25]:
import numpy as np

In [26]:
data = np.random.normal(size=[10, 20])
cov = data.T @ data

In [27]:
u = np.linalg.svd(data[:, 0:3])[0][:, 0:1]
v = data.T @ u
v.shape

(20, 1)

In [28]:
# Greedy: Assume that all variables were chosen. If we round (take top k),
# then we can probably find a different subset that performs better.
greedy_selection = v ** 2 + np.abs(data.T @ (np.eye(10) - u @ u.T) @ data @ v) / np.abs(v)

In [29]:
selected = np.argsort(-np.squeeze(greedy_selection))[0:3]
selected

array([11, 19,  9])

In [30]:
def ipsen_bound_update(selected):
    return (
        u.T @ data[:, selected] @ data[:, selected].T @ u
        + np.linalg.norm(data[:, selected].T @ (np.eye(10) - u @ u.T) @ data[:, selected] @ v[selected, :])
            / np.linalg.norm(v[selected, :]))

In [31]:
ipsen_bound_update(selected)

array([[21.1473]])

In [32]:
import numpy as np
np.set_printoptions(linewidth = 200, precision = 4, suppress = True)

In [33]:
data = np.loadtxt('../NC-Data.csv', delimiter=',', dtype=str)
data = data[1:].astype(float)
k = 7
data

array([[ 1.    , -0.0461,  0.2312, ...,  0.2704,  0.4664,  0.3672],
       [-0.0461,  1.    , -0.0671, ..., -0.0515, -0.0944, -0.0349],
       [ 0.2312, -0.0671,  1.    , ...,  0.147 ,  0.2608,  0.6313],
       ...,
       [ 0.2704, -0.0515,  0.147 , ...,  1.    ,  0.2066,  0.1538],
       [ 0.4664, -0.0944,  0.2608, ...,  0.2066,  1.    ,  0.3486],
       [ 0.3672, -0.0349,  0.6313, ...,  0.1538,  0.3486,  1.    ]])

In [34]:
nodes = np.loadtxt('../NC-K7-Trace-Nodes.csv', delimiter=',', dtype=str)
nodes = nodes[1:].astype(float)
bounds = np.loadtxt('../NC-K7-Trace-Bounds.csv', delimiter=',', dtype=str)
bounds = bounds[1:].astype(float)

In [35]:
# One lower bound impl, and two upper bound impls
i = 3
selection = np.where((nodes == 1).sum(axis=0) == i)[0][0]
bounds[:, selection]

array([4.1355, 5.8707, 7.    ])

In [36]:
raw_data = np.linalg.cholesky(data).T
np.allclose(raw_data.T @ raw_data, data)

True

In [37]:
U = np.linalg.svd(raw_data[:, nodes[:, selection] == 1])[0][:, 0:1]
rank_one_vector = raw_data.T @ U
residual_data = raw_data - U @ (U.T @ raw_data)

In [38]:
np.linalg.matrix_rank(raw_data), np.linalg.matrix_rank(residual_data)

(101, 100)

In [39]:
# Brute-Force Upper Bound. Exponential time complexity.
import itertools
import scipy.special
selected_data = np.where(nodes[:, selection] == 1)[0]
unselected_data = np.where(nodes[:, selection] == -1)[0]
brute_force_data = np.zeros(
    [
        int(scipy.special.binom(len(unselected_data), k - i)),
        k, k,
    ],
    np.float32)
brute_force_data[:, 0:i, 0:i] = data[selected_data, :][:, selected_data]
brute_force_inds = np.asarray(
    list(itertools.combinations(unselected_data, k - i)),
    np.int32)
brute_force_data[:, i:, 0:i] = data[brute_force_inds][:, :, selected_data]
brute_force_data[:, 0:i, i:] = np.transpose(
    brute_force_data[:, i:, 0:i],
    axes=[0, 2, 1])
for index in range(k - i, k):
    brute_force_data[:, index, i:] = (
        data[brute_force_inds[:, index - i:index - i + 1],
             brute_force_inds]
    )
brute_force_data.shape, brute_force_inds.shape

((1502501, 7, 7), (1502501, 4))

In [40]:
np.allclose(
    brute_force_data[42][i:, i:],
    data[:, brute_force_inds[42, :]][brute_force_inds[42, :], :])

False

In [41]:
brute_force_data[42]

array([[ 1.    ,  0.1267,  0.1259, -0.3008, -0.2359, -0.7944, -0.4545],
       [ 0.1267,  1.    ,  0.9886, -0.0098,  0.168 , -0.3048,  0.1556],
       [ 0.1259,  0.9886,  1.    , -0.0247,  0.1813, -0.2793,  0.123 ],
       [-0.3008, -0.0098, -0.0247,  0.    ,  0.    ,  0.    ,  0.    ],
       [-0.2359,  0.168 ,  0.1813, -0.0461,  1.    , -0.0671,  0.4161],
       [-0.7944, -0.3048, -0.2793,  0.2312, -0.0671,  1.    , -0.0467],
       [-0.4545,  0.1556,  0.123 ,  0.1816,  0.4161, -0.0467,  1.    ]], dtype=float32)

In [42]:
brute_force_inds[42, :]

array([ 0,  1,  2, 61], dtype=int32)

In [43]:
brute_force_eigs = np.linalg.eigvalsh(brute_force_data)
opt_inds_loc = np.argmax(brute_force_eigs.max(axis=1))
opt_inds = brute_force_inds[opt_inds_loc, :]
opt_total_inds = np.r_[selected_data, opt_inds]
selected_data, opt_inds, brute_force_eigs[opt_inds_loc, :].max()

(array([ 3, 83, 85]), array([79, 80, 81, 84], dtype=int32), 5.5118036)

In [44]:
# Gersh upper bound on the opt_inds eigvals.
np.abs(data[:, opt_total_inds][opt_total_inds, :]).sum(axis=1).max()

5.804850445790672

In [64]:
# Rank-one greedy lower bound. This out-performed the
# Yuan stochastic approach!
(rank_one_vector[opt_total_inds] ** 2).sum()

5.267480676016402

In [46]:
# Ipsen upper bound on the opt_inds eigvals.
((rank_one_vector[opt_total_inds] ** 2).sum()
    + np.linalg.norm(
        (residual_data[:, opt_total_inds].T @ residual_data[:, opt_total_inds]
        @ rank_one_vector[opt_total_inds]))
        / np.linalg.norm(rank_one_vector[opt_total_inds]))

5.807602843343165

In [47]:
np.c_[np.tile(selected_data[None, :], [len(brute_force_inds), 1]), brute_force_inds].shape

(1502501, 7)

In [68]:
np.transpose(residual_data[:, brute_force_total_inds], [1, 2, 0]).shape

(1502501, 7, 101)

In [70]:
rank_one_vector[brute_force_total_inds, :].shape

(1502501, 7, 1)

In [71]:
# Ipsen upper bound (brute force).
brute_force_ipsen = (
    (rank_one_vector[selected_data] ** 2).sum()
    + (rank_one_vector[brute_force_inds] ** 2).sum(axis=1))
brute_force_total_inds = np.c_[
    np.tile(selected_data[None, :], [len(brute_force_inds), 1]),
    brute_force_inds]
brute_force_ipsen_res = np.matmul(
    np.transpose(residual_data[:, brute_force_total_inds], [1, 0, 2]),
    rank_one_vector[brute_force_total_inds, :])
brute_force_ipsen_res = np.matmul(
    np.transpose(residual_data[:, brute_force_total_inds], [1, 2, 0]),
    brute_force_ipsen_res)
brute_force_ipsen += (
    np.linalg.norm(brute_force_ipsen_res, axis=1)
    / np.linalg.norm(rank_one_vector[brute_force_total_inds, :], axis=1))
brute_force_ipsen.max()

5.807602843343166

In [67]:
# Ipsen upper bound (linearize). Absolute value and L1 norm.
brute_force_ipsen = (
    (rank_one_vector[selected_data] ** 2).sum()
    + (rank_one_vector[brute_force_inds] ** 2).sum(axis=1))
brute_force_total_inds = np.c_[
    np.tile(selected_data[None, :], [len(brute_force_inds), 1]),
    brute_force_inds]
residual_cov_abs = np.abs(residual_data.T @ residual_data)
x, y = np.meshgrid(range(k), range(k))
brute_force_ipsen_res = np.matmul(
    residual_cov_abs[
        brute_force_total_inds[:, x],
        brute_force_total_inds[:, y],
    ],
    np.abs(rank_one_vector)[brute_force_total_inds, :])
brute_force_ipsen_res /= np.linalg.norm(
    np.abs(rank_one_vector)[brute_force_total_inds, :],
    axis=1,
    keepdims=True
)
brute_force_ipsen += np.linalg.norm(brute_force_ipsen_res, axis=1, ord=1)
brute_force_ipsen.max()

7.018428247573138

In [63]:
residual_cov_abs[
        brute_force_total_inds[:, x],
        brute_force_total_inds[:, y],
    ].shape

(1502501, 7, 7)

In [51]:
brute_force_ipsen_res.shape

(101, 1)