In [1]:
import numpy as np
np.set_printoptions(linewidth = 150, precision = 4, suppress = True)

In [2]:
data = np.loadtxt('../NC-Data.csv', delimiter=',', dtype=str)
data = data[1:].astype(float)
# Synthesize columns holding observations, reproducing the cov matrix.
raw_data = np.linalg.cholesky(data).T
np.testing.assert_allclose(data, raw_data.T @ raw_data)
data

array([[ 1.    , -0.0461,  0.2312, ...,  0.2704,  0.4664,  0.3672],
       [-0.0461,  1.    , -0.0671, ..., -0.0515, -0.0944, -0.0349],
       [ 0.2312, -0.0671,  1.    , ...,  0.147 ,  0.2608,  0.6313],
       ...,
       [ 0.2704, -0.0515,  0.147 , ...,  1.    ,  0.2066,  0.1538],
       [ 0.4664, -0.0944,  0.2608, ...,  0.2066,  1.    ,  0.3486],
       [ 0.3672, -0.0349,  0.6313, ...,  0.1538,  0.3486,  1.    ]])

In [3]:
nodes = np.loadtxt('../NC-K7-Trace-Nodes.csv', delimiter=',', dtype=str)
nodes = nodes[1:].astype(float)
bounds = np.loadtxt('../NC-K7-Trace-Bounds.csv', delimiter=',', dtype=str)
bounds = bounds[1:].astype(float)

In [4]:
np.argmax((nodes == 1).sum(axis=0) == 3)

42

In [5]:
bounds[:, 42]

array([4.1355, 5.8707, 7.    ])

In [6]:
selected = np.where(nodes[:, 42] == 1)[0]
selected

array([ 3, 83, 85])

In [7]:
import scipy.special
def ncr(n,k):
    return int(scipy.special.binom(n,k))
ncr(10,3)

120

In [8]:
def combination(n, p, x):
    result = [None for i in range(p)]
    r = 0
    k = 0
    for i in range(p-1):
        result[i] = result[i-1] if i>0 else 0
        result[i] += 1
        r = ncr(n-result[i], p-(i+1))
        k += r
        while k < x:
            result[i] += 1
            r = ncr(n-result[i], p-(i+1))
            k += r
        k -= r
    result[p-1] = result[p-2] + x - k
    return result

In [9]:
import tensorflow as tf

In [10]:
def pascal(n, max_k=None):
    max_k = n if max_k is None else max_k
    result = np.zeros([n,n], np.int64)
    result[:, 0] = 1
    for i in range(1, n):
        for j in range(1, min(i+1, max_k)):
            result[i, j] = result[i-1, j-1] + result[i-1, j]
    return result

In [11]:
ncr = tf.constant(pascal(101, 10))

In [12]:
ncr.numpy().max()

1902231808400

In [13]:
@tf.function(jit_compile=True)
def combination(n, p, x):
    result = tf.TensorArray(ncr.dtype, size=p)
    r = tf.constant(0, ncr.dtype)
    k = tf.constant(0, ncr.dtype)
    next_value = tf.constant(0, ncr.dtype)
    for i in tf.range(p-1):
        next_value += 1
        index = tf.cast(i, tf.int64)
        r = ncr[n-next_value, p-(index+1)]
        k += r
        while k < x:
            next_value += 1
            r = ncr[n-next_value, p-(index+1)]
            k += r
        k -= r
        result = result.write(i, next_value)
    result = result.write(p-1, next_value + x - k)
    return result.stack()

In [14]:
@tf.function(jit_compile=True)
def combinations_batch(n, k, start, limit):
    return tf.map_fn(
        lambda i: combination(n, k, i),
        tf.range(start, limit),
        dtype=tf.int64)

In [15]:
combination(tf.constant(10, tf.int64), 3, tf.constant(15, tf.int64))

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([ 1,  3, 10])>

In [16]:
scipy.special.binom(10, 8)

45.0

In [20]:
combinations_batch(tf.constant(10,tf.int64), 8, tf.constant(0,tf.int64), tf.constant(45,tf.int64)).shape

TensorShape([45, 8])

In [18]:
scipy.special.binom(20, 8)

125970.0

In [21]:
scipy.special.binom(101, 4)

4082925.0

In [19]:
%timeit combinations_batch(tf.constant(20,tf.int64), 8, tf.constant(0,tf.int64), tf.constant(125970,tf.int64))

15.3 ms ± 667 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
data = tf.constant(data, tf.float32)
raw_data = tf.constant(raw_data, tf.float32)

In [30]:
%%timeit
# Gathering every possible covariance matrix: Test throughput.
k = 7
selected = np.where(nodes[:, 42] == 1)[0]
free = np.where(nodes[:, 42] == -1)[0]
choices = combinations_batch(free.shape[0], k - selected.shape[0], 1, 1+ncr[free.shape[0], k - selected.shape[0]]) - 1
choices = tf.gather(free, choices)
choices = tf.concat(
    [
        # Broadcast selected (repeat along the outer axis, N times)
        selected[None, :] + 0 * choices[:, 0:1],
        choices,
    ],
    axis=1,
)
choices = choices[:, None, :]
choices_lookup = tf.concat(
    [
        (choices + 0 * tf.transpose(choices, [0, 2, 1]))[:, :, :, None],
        (tf.transpose(choices, [0, 2, 1]) + 0 * choices)[:, :, :, None],
    ],
    axis=3
)
tf.gather_nd(data, choices_lookup)

1.15 s ± 21.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
# Selected: indicator is 1.
# Free: indicator is 0.
def brute_force_eigvalsh(k, selected, free):
    choices = combinations_batch(free.shape[0], k - selected.shape[0], 1, 1+ncr[free.shape[0], k - selected.shape[0]]) - 1
    choices = tf.gather(free, choices)
    choices = tf.concat(
        [
            # Broadcast selected (repeat along the outer axis, N times)
            selected[None, :] + 0 * choices[:, 0:1],
            choices,
        ],
        axis=1,
    )
    eigvals = tf.linalg.eigvalsh(_simple_select_hermitian(data, choices))
    which_max = tf.argmax(tf.reduce_max(eigvals, axis=-1))
    return tf.reduce_max(eigvals[which_max, :]), choices[which_max]

@tf.function(jit_compile=True)
def _simple_select_hermitian(mat, choices):
    choices = choices[:, None, :]
    choices_lookup = tf.concat(
        [
            (choices + 0 * tf.transpose(choices, [0, 2, 1]))[:, :, :, None],
            (tf.transpose(choices, [0, 2, 1]) + 0 * choices)[:, :, :, None],
        ],
        axis=3
    )
    return tf.gather_nd(mat, choices_lookup)

In [16]:
brute_force_eigvalsh(7, np.where(nodes[:, 42] == 1)[0], np.where(nodes[:, 42] == -1)[0])

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Instructions for updating:
Use fn_output_signature instead


(<tf.Tensor: shape=(), dtype=float64, numpy=5.658232661283705>,
 <tf.Tensor: shape=(7,), dtype=int64, numpy=array([ 3, 83, 85, 79, 80, 81, 84])>)

In [17]:
%timeit brute_force_eigvalsh(7, np.where(nodes[:, 42] == 1)[0], np.where(nodes[:, 42] == -1)[0])

1.06 s ± 68.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
# Selected: indicator is 1.
# Free: indicator is 0.
@tf.function(jit_compile=True)
def upper_bound_sparse_pca_metric(choices, XU, sigma_residual):
    linear_term = tf.reduce_sum(
        tf.gather(tf.square(XU), choices),
        axis=1,
    )

    sigma_indexing = tf.concat(
        [
            (choices[:, :, None] + 0 * choices[:, None, :])[:, :, :, None],
            (choices[:, None, :] + 0 * choices[:, :, None])[:, :, :, None],
        ],
        axis=3,
    )
    sigma_residual = tf.gather_nd(sigma_residual, sigma_indexing)
    v = tf.gather(tf.squeeze(XU, axis=1), choices)[:, :, None]
    nonlinear_term = tf.linalg.norm(
        tf.squeeze(tf.linalg.matmul(sigma_residual, v), axis=2),
        axis=1,
    ) / tf.linalg.norm(tf.squeeze(v, axis=2), axis=1)

    return tf.squeeze(linear_term, axis=1) + nonlinear_term

def upper_bound_sparse_pca(k, selected, free):
    selected_raw_data = tf.transpose(
        tf.gather(
            tf.transpose(raw_data),
            selected))
    U = tf.linalg.svd(selected_raw_data)[1][:, 0]
    XU = tf.linalg.matmul(raw_data, U[:, None], adjoint_a=True)
    raw_data_residual = raw_data - U[:, None] * tf.transpose(XU)
    sigma_residual = tf.linalg.matmul(raw_data_residual, raw_data_residual, adjoint_a=True)

    choices = combinations_batch(free.shape[0], k - selected.shape[0], 1, 1+ncr[free.shape[0], k - selected.shape[0]]) - 1
    choices = tf.gather(free, choices)
    choices = tf.concat(
        [
            # Broadcast selected (repeat along the outer axis, N times)
            selected[None, :] + 0 * choices[:, 0:1],
            choices,
        ],
        axis=1,
    )

    ub = upper_bound_sparse_pca_metric(choices, XU, raw_data_residual)
    which_max = tf.argmax(ub)
    return ub[which_max], choices[which_max]

In [19]:
upper_bound_sparse_pca(7, np.where(nodes[:, 42] == 1)[0], np.where(nodes[:, 42] == -1)[0])

(<tf.Tensor: shape=(), dtype=float64, numpy=5.7777543534984295>,
 <tf.Tensor: shape=(7,), dtype=int64, numpy=array([ 3, 83, 85, 79, 80, 82, 84])>)

In [20]:
%timeit upper_bound_sparse_pca(7, np.where(nodes[:, 42] == 1)[0], np.where(nodes[:, 42] == -1)[0])

502 ms ± 30.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
