In [1]:
import numpy as np
import matplotlib.pyplot as plt
from ebc.sequential_time_tests.non_iterative import SensitivityBasedIS
from ebc.sequential_time_tests.iterative_with_convexification import SensitivityBasedFW
from ebc.sequential_time_tests.iterative_no_convexification import SparseVI, GIGA, IHT
from ebc.gaussian import fisher_norm_under_true_gaussian_posterior, gaussian_multivariate_log_likelihood, gaussian_KL

## Multivariate Gaussian's Mean

In [2]:
def log_likelihood(params, X, y, weights):
    mu = params[:d].reshape(-1, 1)
    sigma = np.diag(params[d:].reshape(-1, 1)[:, 0])
    return np.diag(gaussian_multivariate_log_likelihood(X.T, mu, sigma)).reshape(-1, 1)

# https://stats.stackexchange.com/questions/90134/gradient-of-multivariate-gaussian-log-likelihood
def grad_log_likelihood(params, X, y, weights):
    mu = params[:d].reshape(-1, 1)
    sigma = np.diag(params[d:].reshape(-1, 1)[:, 0])
    return (-np.linalg.inv(sigma) @ (X.T - mu)).reshape(-1, X.shape[1])

def log_posterior(params, X, y, weights):
    return weights.T @ log_likelihood(params, X, y, weights)

In [3]:
seeds = [123, 1234, 12345, 123456, 1234567]

times = []

for seed in seeds:
    print(f"Seed is {seed}")

    # Generate Data
    d = 20
    N = 5000

    np.random.seed(seed)

    # Theta as in sparse_vi
    theta = np.random.multivariate_normal(mean = np.zeros(d), cov = np.identity(d))

    # x as in sparse_vi
    x = np.random.multivariate_normal(mean = theta, cov = np.identity(d), size = N)

    # Parameters
    sigma_0 = np.identity(d)
    sigma = np.identity(d)
    mu_0 = np.zeros(d).reshape(-1, 1)

    # Full Gaussian posterior
    sigma_full = np.linalg.inv(np.linalg.inv(sigma_0) + N * np.linalg.inv(sigma))
    mu_full =  sigma_full @ (np.linalg.inv(sigma_0) @ mu_0 + np.linalg.inv(sigma) @ np.sum(x, axis = 0).reshape(-1, 1))        

    na = {"log_likelihood": log_likelihood,
          "log_likelihood_start_value": np.ones(2 * d),
          "S": 500,
          "log_likelihood_gradient": grad_log_likelihood,
          "approx": "MCMC",
          "MCMC_subs_size": 500,
          "log_posterior": log_posterior,
          "log_posterior_start_value": np.ones(2 * d)}
    
    time_i = np.zeros((3, 4, 5)) # coreset size - step - algorithm
    
    j = 0
    for i in [50, 500, 2000]:
        print(f"Coreset size is {i}")
        sbis = SensitivityBasedIS(x)
        sbfw = SensitivityBasedFW(x)
        giga = GIGA(x)
        svi = SparseVI(x)
        iht = IHT(x)
        s1, s2, s3, s4 = sbis.run(k = i, likelihood_gram_matrix = None, norm = "2", norm_attributes = na)
        time_i[j, 0, 0] = s1
        time_i[j, 1, 0] = s2
        time_i[j, 2, 0] = s3
        time_i[j, 3, 0] = s4
        
        s1, s2, s3, s4 = sbfw.run(k = i, likelihood_gram_matrix = None, norm = "2", norm_attributes = na)
        time_i[j, 0, 1] = s1
        time_i[j, 1, 1] = s2
        time_i[j, 2, 1] = s3
        time_i[j, 3, 1] = s4
        
        s1, s2, s3, s4 = giga.run(k = i, likelihood_vectors = None, norm = "2", norm_attributes = na)
        time_i[j, 0, 2] = s1
        time_i[j, 1, 2] = s2
        time_i[j, 2, 2] = s3
        time_i[j, 3, 2] = s4
        
        s1, s2, s3, s4 = svi.run(k = i, likelihood_gram_matrix = None, 
                         norm_attributes = na, T = 100, gamma_func = lambda x : 1e-2 / x)
        time_i[j, 0, 3] = s1
        time_i[j, 1, 3] = s2
        time_i[j, 2, 3] = s3
        time_i[j, 3, 3] = s4
        
        s1, s2, s3, s4 = iht.run(k = i, likelihood_vectors = None, norm = "2", norm_attributes = na)
        time_i[j, 0, 4] = s1
        time_i[j, 1, 4] = s2
        time_i[j, 2, 4] = s3
        time_i[j, 3, 4] = s4
        
        j += 1
        
    times.append(time_i)

Seed is 123
Coreset size is 50
Coreset size is 500
Coreset size is 2000
Seed is 1234
Coreset size is 50
Coreset size is 500
Coreset size is 2000
Seed is 12345
Coreset size is 50
Coreset size is 500
Coreset size is 2000
Seed is 123456
Coreset size is 50
Coreset size is 500
Coreset size is 2000
Seed is 1234567
Coreset size is 50
Coreset size is 500
Coreset size is 2000


In [4]:
times

[array([[[5.63131530e+01, 5.49513693e+01, 5.30992758e+01, 3.55777493e+02,
          5.66980970e+01],
         [5.60283661e-05, 5.23442030e-01, 1.08740687e+00, 2.19498355e+01,
          3.17065716e+00],
         [0.00000000e+00, 2.04634666e-03, 2.78704166e-02, 1.66670799e-01,
          1.43430185e+00],
         [1.59525871e-03, 1.01220584e+00, 2.28013277e-01, 1.96888447e-01,
          3.39125562e+00]],
 
        [[5.43843729e+01, 5.31409609e+01, 5.27043719e+01, 3.47682024e+02,
          5.27780433e+01],
         [3.62396240e-05, 4.96742344e+00, 1.05225897e+01, 2.06177587e+02,
          2.94220972e+00],
         [0.00000000e+00, 2.53505707e-02, 3.12123775e-01, 1.57265234e+00,
          1.53032136e+00],
         [9.67741013e-04, 1.01067722e+01, 2.21788716e+00, 1.90139079e+00,
          3.10035205e+00]],
 
        [[5.30684459e+01, 5.35252960e+01, 5.26694860e+01, 3.41586241e+02,
          5.27171440e+01],
         [3.19480896e-05, 1.97985756e+01, 4.21681218e+01, 7.97186043e+02,
          8

In [5]:
full_time = np.array(times)

In [12]:
meds = np.median(full_time, axis = 0)
stds = np.std(full_time, axis = 0)

In [14]:
meds.shape # coreset size - step - algorithm

(3, 4, 5)

In [24]:
meds[0] # 50

array([[5.30020196e+01, 5.30598869e+01, 5.28064508e+01, 3.45020769e+02,
        5.26906600e+01],
       [3.00407410e-05, 4.91610289e-01, 1.07560301e+00, 2.02203243e+01,
        3.14774656e+00],
       [0.00000000e+00, 2.21061707e-03, 2.98202038e-02, 1.56371593e-01,
        1.35622573e+00],
       [9.57250595e-04, 9.75538015e-01, 2.28013277e-01, 1.99921370e-01,
        3.39125562e+00]])

In [16]:
meds[1] # 500

array([[5.28606219e+01, 5.29885361e+01, 5.27043719e+01, 3.46116689e+02,
        5.27780433e+01],
       [3.07559967e-05, 4.94825816e+00, 1.05225897e+01, 1.97411781e+02,
        2.91796923e+00],
       [0.00000000e+00, 2.33876705e-02, 3.12123775e-01, 1.57265234e+00,
        1.52761412e+00],
       [9.71078873e-04, 9.99344730e+00, 2.22850847e+00, 1.90139079e+00,
        3.07102799e+00]])

In [17]:
meds[2] # 2000

array([[5.30684459e+01, 5.30124490e+01, 5.26694860e+01, 3.45681974e+02,
        5.25127280e+01],
       [3.29017639e-05, 1.97985756e+01, 4.18619561e+01, 7.96064130e+02,
        2.88484192e+00],
       [0.00000000e+00, 1.00743294e-01, 1.21957779e+00, 6.28121829e+00,
        1.97575903e+00],
       [1.09291077e-03, 4.02775736e+01, 8.87001085e+00, 8.51622748e+00,
        2.93936324e+00]])

In [19]:
stds[0]

array([[1.36191705e+00, 8.96788899e-01, 2.57697732e-01, 6.81380759e+00,
        1.85276177e+00],
       [1.03118172e-05, 1.52655547e-02, 1.46465624e-02, 7.41571232e-01,
        6.10860280e-01],
       [0.00000000e+00, 1.14610328e-04, 2.57734908e-03, 7.59892896e-03,
        3.14101832e-02],
       [2.62352858e-04, 1.52566925e-02, 3.70322900e-03, 1.19006166e-02,
        1.06743545e+00]])

In [20]:
stds[1]

array([[2.05499469e+00, 1.51001707e+00, 7.98564894e-01, 3.13432487e+00,
        3.92626349e-01],
       [6.65081009e-06, 5.35564809e-02, 1.93642190e-01, 3.77097062e+00,
        7.98512003e-01],
       [0.00000000e+00, 1.05092990e-03, 6.85272289e-02, 6.78759605e-02,
        4.40322413e-01],
       [1.27746214e-04, 1.01111441e-01, 5.68941572e-02, 9.18808931e-02,
        8.53321183e-01]])

In [21]:
stds[2]

array([[4.08308823e-01, 6.91094092e-01, 4.30187177e-01, 1.90889212e+00,
        1.14268044e+00],
       [3.79856992e-06, 4.73132387e-01, 4.29299752e-01, 2.23282761e+00,
        1.59726399e+00],
       [0.00000000e+00, 5.86497460e-03, 8.70759493e-02, 1.75448769e-01,
        7.73206126e-01],
       [6.80329828e-05, 8.11967477e-01, 1.08747010e-01, 5.34968013e-01,
        2.29573262e+00]])