In [16]:
import numpy as np

from jax import jit
import jax.numpy as jnp

import klsurprise as kls

# Setting up a mock case for debuging

In [17]:
# Set the seed for reproducibility
np.random.seed(42)

# Constants and dimensions
theta_dim = 2
D_dim = 7

# Gera um vetor de parametros \theta_1 e theta_2
theta_fid_1 = np.array([0.4, 0.1])
theta_fid_2 = np.array([1.3, 0.2])


# generate F(theta) related quantities
F0 = np.random.rand(D_dim)  # 7x1 vector
M = np.random.rand(D_dim, theta_dim)  # 7x2 matrix
# generate covariance matrix for likelihood L2
C = np.random.rand(D_dim, D_dim)  # 7x7 matrix
# generate covariance matrix for posterior 1
Sigma_1 = np.random.rand(theta_dim, theta_dim)  # 2x2 matrix

# Ensure C and Sigma are symmetric and positive-definite
C = np.dot(C, C.T)
Sigma_1 = np.dot(Sigma_1, Sigma_1.T)
invS1 = np.linalg.inv(Sigma_1)

# covariance matrix of posterior 2
# Assuming Gaussianity and a flat prior, we can also derive the equations for Sigma_2. 
# See https://arxiv.org/abs/1402.3593 eq. A17.
invC  = np.linalg.inv(C)
invS2 = np.dot(M.T, np.dot(invC, M))
Sigma_2 = np.linalg.inv(invS2)

# Define the linear function F(theta)
def F(theta):
    return F0 + np.dot(M, theta)

# fiducial data vectors
################# hey there, maybe you should think some more here about putting noise to this vector!
D1_fid = F(theta_fid_1) 
D2_fid = F(theta_fid_2)

print("Fiducial parameters 1 = ", theta_fid_1)
print("Fiducial parameters 2 = ", theta_fid_2)

Fiducial parameters 1 =  [0.4 0.1]
Fiducial parameters 2 =  [1.3 0.2]


In [18]:
def multivariate_gaussian_pdf(theta, mean, cov):
    """
    Calculate the PDF of a multivariate Gaussian distribution.

    Parameters:
    mean : array-like, shape (n,)
        The mean vector of the Gaussian distribution.
    cov : array-like, shape (n, n)
        The covariance matrix of the Gaussian distribution.
    theta : array-like, shape (n,)
        The parameter vector at which to evaluate the PDF.

    Returns:
    pdf_value : float
        The PDF value of the multivariate Gaussian at the data point.
    """
    k = mean.shape[0]
    diff = theta - mean
    inv_cov = jnp.linalg.inv(cov)
    logL = -0.5 * jnp.dot(diff, jnp.dot(inv_cov, diff))
    norm_factor = jnp.log(jnp.sqrt((2 * jnp.pi) ** k * jnp.linalg.det(cov)))
    logpdf_value = logL - norm_factor
    return logpdf_value

**Define both loglikelihoods that will be used in the main function**

In [19]:
# Define the distributions as callable functions of theta and D
@jit
def logL1(theta):
    return multivariate_gaussian_pdf(theta, theta_fid_1, Sigma_1)

@jit
def logL2(theta):
    return multivariate_gaussian_pdf(theta, theta_fid_2, Sigma_2)

domain = np.array([[5,-5], [5,-5]])

# Debuging part

## old

**Class debug and nested sampling run:**

In [18]:
logL1(np.array([0.4,2]))

Array(-4.2779007, dtype=float32)

In [19]:
sup = kls.surprise_statistics(logL1, F, covariance_matrix_2=C, domain=domain, data_2=D2_fid)

In [20]:
res_1 = sup.run_nested_sampling(logL1, print_progress=True)
res_2 = sup.run_nested_sampling(logL2, print_progress=True)

0it [00:00, ?it/s]

  cur_live_logl[not_finite] = _LOWL_VAL
17228it [00:16, 899.33it/s, batch: 18 | bound: 0 | nc: 5 | ncall: 88341 | eff(%): 19.502 | loglstar: -7.965 < -6.258 < -1.843 | logz: -4.554 +/-  0.038 | stop:  1.364]         Traceback (most recent call last):
  File "/home/prm/anaconda3/envs/surprise/lib/python3.10/site-packages/dynesty/dynesty.py", line 910, in __call__
    return self.func(np.asarray(x).copy(), *self.args, **self.kwargs)
KeyboardInterrupt
17319it [00:16, 1019.54it/s, batch: 18 | bound: 0 | nc: 1 | ncall: 88734 | eff(%): 19.351 | loglstar: -7.965 < -5.506 < -1.843 | logz: -4.554 +/-  0.038 | stop:  1.364]


Exception while calling loglikelihood function:
  params: [-1.42533337 -3.56723243]
  args: []
  kwargs: {}
  exception:


KeyboardInterrupt: 

**KLD value**

In [8]:
KLD_value = sup.KLD_numerical(res_2, logL2, res_1, logL1)
print("KLD(p2|p1) = {:.2f} nats".format(KLD_value))

NameError: name 'res_2' is not defined

## Main routine

In [20]:
sup = kls.surprise_statistics(logL1, F, covariance_matrix_2=C,
                               domain=domain, data_2=D2_fid,
                               data_1_name = "NS_res_1.pkl",
                               data_2_name = "NS_res_2.pkl",
                               init_NS = True, Nppd = 4)
sup.__initialize_NS__()
sup.__initialize_PPD__(1)

----------------------------------------------------------------------
Loading posterior data
----------------------------------------------------------------------
Data loaded sucessfully!
----------------------------------------------------------------------
Loading posterior data
----------------------------------------------------------------------
Data loaded sucessfully!
Evaluating theory from sample distribution p1


Generating theory vectors:   0%|          | 0/1 [00:00<?, ?it/s]

Sampling the Posterior Predictive Distribution...


Sampling PPD:   0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
sample = jnp.array(sup.PPD_chain[0])
# sup.kld_worker(sample)


theta_sample = jnp.array(sup.res_1.samples_equal()[131])
theta_sample = sup.res_1.samples_equal()[131]
print(sample)
print(theta_sample)

sup.data_2_model_fun(theta_sample)
theta_sample.shape
sup.logL2(theta_sample, sup.data_2)

[0.59060407 1.9428009  1.4251133  1.800309   2.31291    0.58532536
 1.222898  ]
[-0.95229404  0.96461367]


Array(-53.519775, dtype=float32)

## Debuging kld workr

In [29]:
def kld_worker(sample, logL_mock = None, mock1_NS_result= None, logP_1=None, ndim=None, 
            prior_transform='flat', n_effective = 20000, clip_range = [-1e16, 50000]):

    if (ndim is None):
        ndim = sup.ndim
    if logL_mock is None:
        logL_mock = sup.logL2
    if mock1_NS_result is None:
        mock1_NS_result = sup.res_1 

    # create mock data and run nested sampling
    # @jit
    def logpMock_2(theta):
        return logL_mock(theta, sample) # create full posterior distribution 2
    results2 = sup.run_nested_sampling(logpMock_2, ndim=ndim, prior_transform=prior_transform, 
                                    domain=sup.domain, n_effective=n_effective, print_progress=True) # functions arguments are the best for SNIa chain.
    if logP_1 is None:
        print("Please make sure the log-posterior of data-1 is a valid function for MCMC mode!")
    
    kld_return = sup.KLD_numerical(results2, logpMock_2, mock1_NS_result, logQ=sup.logL1, domain = sup.domain, prior_transform=prior_transform,
                                clip_range = clip_range, clip_values = True, progress=False, batch_size = 1000)
        
    return kld_return, sample


In [32]:
ndim = sup.ndim
logL_mock = sup.logL2
mock1_NS_result = sup.res_1 

# create mock data and run nested sampling
# @jit
def logpMock_2(theta):
    return logL_mock(theta, sample) # create full posterior distribution 2
results2 = sup.run_nested_sampling(logpMock_2, ndim=ndim, prior_transform="flat", 
                                domain=sup.domain, n_effective=5000, print_progress=True) # functions arguments are the best for SNIa chain.

kld_return = sup.KLD_numerical(results2, logpMock_2, mock1_NS_result, logQ=sup.logL1, domain = sup.domain,
                                prior_transform="flat", progress=False, batch_size = 1000)

8539it [00:20, 409.55it/s, batch: 3 | bound: 3 | nc: 1 | ncall: 49663 | eff(%): 15.459 | loglstar: -6.587 < -3.935 < -4.136 | logz: -8.150 +/-  0.067 | stop:  0.929]             


TracerArrayConversionError: The numpy.ndarray conversion method __array__() was called on traced array with shape float32[2].
The error occurred while tracing the function logP_norm at /home/prm/klsurprise/klsurprise.py:353 for jit. This concrete value was not available in Python because it depends on the value of the argument x.
See https://jax.readthedocs.io/en/latest/errors.html#jax.errors.TracerArrayConversionError

In [36]:
@jit
def logpMock_2(theta):
    return logL_mock(theta, sample) # create full posterior distribution 2
kld_return = sup.KLD_numerical(results2, logpMock_2, mock1_NS_result, logQ=sup.logL1, domain = sup.domain,
                                prior_transform="flat", progress=False, batch_size = 1000)

TracerArrayConversionError: The numpy.ndarray conversion method __array__() was called on traced array with shape float32[2].
The error occurred while tracing the function logpMock_2 at /tmp/ipykernel_25518/3978497871.py:1 for jit. This concrete value was not available in Python because it depends on the value of the argument theta.
See https://jax.readthedocs.io/en/latest/errors.html#jax.errors.TracerArrayConversionError

In [None]:
res = sup.surprise_function_call(2, n_jobs = 1)

Handling dataset 1...
______________________________________________________________________
Done!

Handling posterior predictive distribution PPD(D2|D1) ...
______________________________________________________________________
Will sample KLD the same size as PPD.
Nkld =  1
Handling KLD distribution...
______________________________________________________________________


Iterating over the PPD:   0%|          | 0/1 [00:00<?, ?it/s]

UnboundLocalError: local variable 'domain_pass' referenced before assignment

In [45]:
sup.logL2(theta_sample, jnp.array(sup.data_2))

TypeError: Cannot interpret value of type <class 'klsurprise.surprise_statistics'> as an abstract array; it does not have a dtype attribute

In [29]:
sup.logL2(theta_sample, sample) 

TypeError: Cannot interpret value of type <class 'klsurprise.surprise_statistics'> as an abstract array; it does not have a dtype attribute

In [28]:
# create mock data and run nested sampling
@jit
def logpMock_2(theta):
    return sup.logL2(theta, sample) # create full posterior distribution 2

logpMock_2(theta_sample)

TypeError: Cannot interpret value of type <class 'klsurprise.surprise_statistics'> as an abstract array; it does not have a dtype attribute

In [15]:
# create mock data and run nested sampling
def logpMock_2(theta):
    return sup.logL2(theta, sample) # create full posterior distribution 2
results2 = sup.run_nested_sampling(logpMock_2, ndim=sup.ndim, prior_transform="flat", 
                                domain=sup.domain, n_effective=10000) # functions arguments are the best for SNIa chain.
if logP_1 is None:
    print("Please make sure the log-posterior of data-1 is a valid function for MCMC mode!")

kld_return = self.KLD_numerical(results2, logpMock_2, sup.res_1, logP_1=sup.logL1, domain = sup.domain, prior_transform="flat",progress=False, batch_size = 1000)

  cur_live_logl[not_finite] = _LOWL_VAL


Exception while calling loglikelihood function:
  params: [-0.52300278 -3.70938157]
  args: []
  kwargs: {}
  exception:


Traceback (most recent call last):
  File "/home/prm/anaconda3/envs/surprise/lib/python3.10/site-packages/dynesty/dynesty.py", line 910, in __call__
    return self.func(np.asarray(x).copy(), *self.args, **self.kwargs)
  File "/tmp/ipykernel_25518/2946902955.py", line 3, in logpMock_2
    return sup.logL2(theta, sample) # create full posterior distribution 2
  File "/home/prm/klsurprise/klsurprise.py", line 110, in logL2
    log_likelihood = -0.5 * (n * jnp.log(2 * jnp.pi) + log_det_cov + jnp.dot(solve, solve))
KeyboardInterrupt


KeyboardInterrupt: 

In [8]:
res = sup.surprise_function_call(2, n_jobs = 1)

Handling dataset 1...
______________________________________________________________________
Done!

Handling posterior predictive distribution PPD(D2|D1) ...
______________________________________________________________________
Will sample KLD the same size as PPD.
Nkld =  1
Handling KLD distribution...
______________________________________________________________________


Iterating over the PPD:   0%|          | 0/1 [00:00<?, ?it/s]

UnboundLocalError: local variable 'domain_pass' referenced before assignment

In [12]:
sup.data_2_model_fun(np.array([0.3,0.1]))

array([0.69450446, 1.16519453, 1.10621116, 0.68054271, 0.24146422,
       0.35661595, 0.20663764])

In [None]:
import PPD
PPD.create_ppd_chain()