In [2]:
import numpy as np
import pandas as pd
import random
import time

from matplotlib import pyplot as plt
import seaborn as sns

from selectinf.nbd_lasso import nbd_lasso
from selectinf.Utils.discrete_family import discrete_family
from selectinf.Tests.instance import GGM_instance

from selectinf.Tests.nbd_naive_and_ds import *

In [7]:
n = 800#np_pair[0]
p = 30#np_pair[1]
weights_const = 0.5
ridge_const = 1.
randomizer_scale = 1.
ncoarse = 200
logic=1

In [8]:
def approx_inference_sim(X, prec, weights_const=1., ridge_const=0., randomizer_scale=1.,
                         parallel=False, ncores=4, logic = 'OR', ncoarse=200,
                         solve_only=False, continued=False, nbd_instance_cont=None):
    # Precision matrix is in its original order, not scaled by root n
    # X is also in its original order
    n,p = X.shape

    if not continued:
        nbd_instance = nbd_lasso.gaussian(X, n_scaled=False, weights_const=weights_const,
                                          ridge_terms=ridge_const, randomizer_scale=randomizer_scale)
        active_signs_random = nbd_instance.fit(logic=logic)
        nonzero = nbd_instance.nonzero

    # If we only need to solve the Lasso
    if solve_only:
        return nonzero, nbd_instance

    # If we continue a previous run with a nontrivial selection
    if continued:
        nbd_instance = nbd_instance_cont
        nonzero = nbd_instance.nonzero
        assert nonzero.sum() > 0

    # Construct intervals
    if nonzero.sum() > 0:
        # Intervals returned is in its original (unscaled) order
        intervals = nbd_instance.inference(parallel=parallel, ncoarse=ncoarse, ncores=ncores)
        # coverage is upper-triangular
        coverage = get_coverage(nonzero, intervals, prec, n, p, scale=False)
        interval_len = 0
        nonzero_count = 0  # nonzero_count is essentially upper-triangular
        for i in range(p):
            for j in range(i+1,p):
                if nonzero[i,j]:
                    interval = intervals[i,j,:]
                    interval_len = interval_len + (interval[1] - interval[0])
                    nonzero_count = nonzero_count + 1
        avg_len = interval_len / nonzero_count
        cov_rate = coverage.sum() / nonzero_count
        return nonzero, intervals, cov_rate, avg_len
    return None, None, None, None

In [50]:
def GGM_instance(n=100, p=100, max_edges=10, signal=1.):
    from scipy.stats import norm
    def generate_vertices(p):
        vertices = np.random.uniform(size=(p,2))
        return vertices
    def connecting_prob(v1,v2,p):
        # Euclidean distance of v1, v2
        d = np.linalg.norm(v1-v2)
        # calculating connecting probability
        prob = norm.pdf(d/np.sqrt(p))
        return prob
    def remove_edges(p, adj, max_edges):
        idx = list(range(p))
        np.random.shuffle(idx)

        for i in idx:
            if np.all(np.sum(adj, axis=1) <= (max_edges+1)):
                break
            # Indices of nodes connected to v_i
            nonzero_i = list(np.nonzero(adj[i])[0])
            n_edges = len(nonzero_i)

            # Delete some edges if there are redundancies
            if n_edges > (max_edges+1):
                nonzero_i.remove(i)
                removed_idx_i = random.sample(nonzero_i,n_edges-max_edges)
                # Remove other edges
                adj[i,removed_idx_i] = 0
                adj[removed_idx_i,i] = 0

        return adj

    vertices = generate_vertices(p)

    adj_mat = np.eye(p)

    for i in range(p):
        for j in range(i+1,p):
            v_i = vertices[i]
            v_j = vertices[j]
            adj_mat[i,j] = np.random.binomial(n=1,
                                              p=connecting_prob(v1=v_i,
                                                                v2=v_j,
                                                                p=p))

    # symmetrize
    adj_mat = adj_mat + adj_mat.T - np.eye(p)

    # remove redundant edges
    adj_mat = remove_edges(p, adj_mat, max_edges)

    # maximal off-diag value to guarantee diagonal dominance
    max_off_diag = 1/max_edges

    # generate a PD precision
    precision = np.random.uniform(size=(p,p), low=0.75,
                                  high=0.85)

    # precision = max_off_diag * (np.random.binomial(n=1,p=0.5,size=(p, p)) * 2 - 1)
    # symmetrize precision
    precision = np.tril(precision)
    precision = precision + precision.T
    # sparsify precision based on adjacency matrix
    precision = precision * adj_mat
    np.fill_diagonal(precision, 1)
    cov = np.linalg.inv(precision)

    # standardize the covariance
    cov = cov / np.outer(np.sqrt(np.diag(cov)), np.sqrt(np.diag(cov)))
    precision = np.linalg.inv(cov)

    X = np.random.multivariate_normal(mean=np.zeros(p),
                                      cov=cov, size=n)

    return precision, cov, X

In [51]:
prec,cov,X = GGM_instance(n=n, p=p, max_edges=1, signal=1)

In [52]:
print("|E^*|:", (prec != 0).sum())

|E^*|: 32


In [53]:
prec[prec>1]

array([3.52017075, 2.97849481, 2.97849481, 3.52017075])

In [54]:
start = time.time()
# Approximate inference
nbd_instance = nbd_lasso.gaussian(X, n_scaled=False, weights_const=weights_const,
                                          ridge_terms=ridge_const, randomizer_scale=randomizer_scale)
active_signs_random = nbd_instance.fit(logic=logic)
nonzero = nbd_instance.nonzero
intervals = nbd_instance.inference(parallel=False, ncoarse=200, ncores=1)


print("Approx |E|:", nonzero.sum())
end = time.time()
print("time used:", end-start)
print("avg time:", (end-start) / nonzero.sum())

Inference for 0 , 2
Inference for 4 , 20


  self._partition *= np.exp(_largest)


Inference for 4 , 21
Inference for 5 , 7
Inference for 9 , 19
Inference for 18 , 19
Inference for 19 , 28
Inference for 21 , 24
Inference for 22 , 29
Approx |E|: 18
time used: 15.44553804397583
avg time: 0.8580854468875461


In [56]:
intervals[4,20]

array([2.95457443, 3.54074396])

In [13]:
#Approx |E|: 98
#time used: 492.61628794670105
#avg time: 5.026696815782664

In [57]:
np.finfo(np.float64).max

1.7976931348623157e+308

In [59]:
np.finfo(float).max

1.7976931348623157e+308