In [1]:
import numpy as np
import pandas as pd
import random
import time

from matplotlib import pyplot as plt
import seaborn as sns

from selectinf.nbd_lasso import nbd_lasso
from selectinf.Utils.discrete_family import discrete_family
from selectinf.Tests.instance import GGM_instance

from selectinf.Tests.nbd_naive_and_ds import *

In [14]:
n = 400#np_pair[0]
p = 20#np_pair[1]
weights_const = 0.2
ridge_const = 1.
randomizer_scale = 1.
ncoarse = 200
logic=1

In [15]:
def approx_inference_sim(X, prec, weights_const=1., ridge_const=0., randomizer_scale=1.,
                         parallel=False, ncores=4, logic = 'OR', ncoarse=200,
                         solve_only=False, continued=False, nbd_instance_cont=None):
    # Precision matrix is in its original order, not scaled by root n
    # X is also in its original order
    n,p = X.shape

    if not continued:
        nbd_instance = nbd_lasso.gaussian(X, n_scaled=False, weights_const=weights_const,
                                          ridge_terms=ridge_const, randomizer_scale=randomizer_scale)
        active_signs_random = nbd_instance.fit(logic=logic)
        nonzero = nbd_instance.nonzero

    # If we only need to solve the Lasso
    if solve_only:
        return nonzero, nbd_instance

    # If we continue a previous run with a nontrivial selection
    if continued:
        nbd_instance = nbd_instance_cont
        nonzero = nbd_instance.nonzero
        assert nonzero.sum() > 0

    # Construct intervals
    if nonzero.sum() > 0:
        # Intervals returned is in its original (unscaled) order
        intervals = nbd_instance.inference(parallel=parallel, ncoarse=ncoarse, ncores=ncores)
        # coverage is upper-triangular
        coverage = get_coverage(nonzero, intervals, prec, n, p, scale=False)
        interval_len = 0
        nonzero_count = 0  # nonzero_count is essentially upper-triangular
        for i in range(p):
            for j in range(i+1,p):
                if nonzero[i,j]:
                    interval = intervals[i,j,:]
                    interval_len = interval_len + (interval[1] - interval[0])
                    nonzero_count = nonzero_count + 1
        avg_len = interval_len / nonzero_count
        cov_rate = coverage.sum() / nonzero_count
        return nonzero, intervals, cov_rate, avg_len
    return None, None, None, None

In [18]:
prec,cov,X = GGM_instance(n=n, p=p, max_edges=2, signal=0.7)

In [19]:
print("|E^*|:", (prec != 0).sum())

|E^*|: 40


In [20]:
start = time.time()
# Approximate inference
nonzero_approx, intervals_approx, cov_rate_approx, avg_len_approx \
    = approx_inference_sim(X, prec, weights_const=weights_const,
                           ridge_const=ridge_const, randomizer_scale=randomizer_scale,
                           parallel=False, ncores=1,
                           logic=logic, solve_only=False, continued=False)
print("Approx |E|:", nonzero_approx.sum())
end = time.time()
print("time used:", end-start)
print("avg time:", (end-start) / nonzero_approx.sum())

Inference for 0 , 1
Inference for 0 , 2
Inference for 0 , 3
Inference for 0 , 5


KeyboardInterrupt: 

In [13]:
#Approx |E|: 98
#time used: 492.61628794670105
#avg time: 5.026696815782664