In [1]:
import h5py
import ast
from utils.misc_utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

In [2]:
from utils.knn_evaluators import Evaluator_KNN

eval = Evaluator_KNN()

eval.data_path = "data.hdf5"
eval.out_path = "results/knn.hdf5"
eval.logger = get_logger("results/knn_mi.log")

eval.quantity = "KLD"

eval.hyper_params = [1, 3, 5, 15, 50]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 3)

for k, v in vars(eval).items():
    print(f"{k} - {v}")

eval.create_database()
eval.create_group()

data_path - data.hdf5
out_path - results/knn.hdf5
logger - <RootLogger root (DEBUG)>
quantity - KLD
hyper_params - [1, 3, 5, 15, 50]
sample_sizes - [100, 200, 500, 1000, 5000, 10000, 50000, 100000]
seeds - range(1, 3)
results - None


In [3]:
# # # # # UNIFORM # # # # #

experiment = "uniform"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

true_kld = np.log(dist2_params[0][1] / dist1_params[0][1])

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "uniform||uniform", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:26:12 - Creating converter from 3 to 5
  kld = (d / n) * np.sum(np.log(nu / rho)) + np.log(m / (n - 1))
2023-11-14 11:26:12 - (UNIFORM, 1, 10000, 1) - Time: 0.01931 s - Est.: -inf nats
2023-11-14 11:26:16 - (UNIFORM, 1, 10000, 2) - Time: 0.01414 s - Est.: -inf nats


True KLD: 0.288 nats


In [4]:
# # # # # NORMAL # # # # #

experiment = "normal"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

true_kld = 0.5 * (
    (dist1_params[0][1]/dist2_params[0][1]) ** 2 +
    (dist2_params[0][0] - dist1_params[0][0]) ** 2 / (dist2_params[0][1] ** 2) -
    1 + np.log((dist2_params[0][1]**2)/(dist1_params[0][1]**2))
) # Reference

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "normal||normal", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:27:54 - (NORMAL, 1, 10000, 1) - Time: 0.01492 s - Est.: -inf nats
2023-11-14 11:27:57 - (NORMAL, 1, 10000, 2) - Time: 0.01496 s - Est.: 0.384 nats


True KLD: 0.361 nats


In [5]:
# # # # # NORMAL-MIXTURE # # # # #

experiment = "normal-mixture"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def kld_normals(x, params1, params2):
    p = pdf_normal(x, params1)
    q = pdf_normal(x, params2)
    return p * np.log(p / q)

norm_lims = [[-15, 25]]

true_kld = nquad(kld_normals, norm_lims, args=(dist1_params, dist2_params,))[0] # Numerical Integration Solution

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "normal-mixture||normal", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:28:17 - (NORMAL-MIXTURE, 1, 10000, 1) - Time: 0.01398 s - Est.: 0.184 nats
2023-11-14 11:28:20 - (NORMAL-MIXTURE, 1, 10000, 2) - Time: 0.01460 s - Est.: -inf nats
2023-11-14 11:28:23 - Creating converter from 5 to 3


True KLD: 0.179 nats


In [6]:
# # # # # EXPONENTIAL # # # # #

experiment = "exponential"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

true_kld = np.log(1 / dist1_params[0][1]) - np.log(1 / dist2_params[0][1]) + dist1_params[0][1] / dist2_params[0][1] - 1 # Reference

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "exp||exp", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:28:29 - (EXPONENTIAL, 1, 10000, 1) - Time: 0.02581 s - Est.: -inf nats
2023-11-14 11:28:32 - (EXPONENTIAL, 1, 10000, 2) - Time: 0.01725 s - Est.: 0.670 nats


True KLD: 0.636 nats


In [9]:
# # # # # BIVARIATE NORMAL # # # # #

experiment = "bivariate-normal"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

m1, s1, _ = dist1_params[0]
m2, s2, _ = dist2_params[0]
m1, s1, m2, s2 = [np.array(p) for p in [m1, s1, m2, s2]]

true_kld = 0.5 * (
    np.log(np.linalg.det(s2)/np.linalg.det(s1)) + 
    np.trace(np.linalg.inv(s2) @ s1) +
    (m2 - m1).T @ np.linalg.inv(s2) @ (m2 - m1) -
    len(m2)
)

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "bivariate-normal||bivariate-normal", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:30:34 - (BIVARIATE-NORMAL, 1, 10000, 1) - Time: 0.01830 s - Est.: -inf nats
2023-11-14 11:30:39 - (BIVARIATE-NORMAL, 1, 10000, 2) - Time: 0.01884 s - Est.: 0.937 nats


True KLD: 0.949 nats


In [8]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def kld_mnorms(x, y, params1, params2):
    p = pdf_mnorm(x, y, params1)
    q = pdf_mnorm(x, y, params2)
    return p * np.log(p / q)

mnorm_lims = [[-7, 7], [-7, 7]]

true_kld = nquad(kld_mnorms, mnorm_lims, args=(dist1_params, dist2_params,))[0]

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "bivariate-normal-mixture||bivariate-normal", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:29:10 - (BIVARIATE-NORMAL-MIXTURE, 1, 10000, 1) - Time: 0.01987 s - Est.: -inf nats
2023-11-14 11:29:16 - (BIVARIATE-NORMAL-MIXTURE, 1, 10000, 2) - Time: 0.02592 s - Est.: 0.329 nats


True KLD: 0.312 nats


In [11]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

def pdf_gamma_exponential(x, y, params):
    z = 0.0
    for dist in params:
        t, w = dist
        z += (1 / gamma(t)) * (x**t) * np.exp(-x - x * y) * w
    return z

def kld_gamma_exponentials(x, y, params1, params2):
    p = pdf_gamma_exponential(x, y, params1)
    q = pdf_gamma_exponential(x, y, params2)
    return p * np.log(p / q)

gexp_lims = [[0, 15], [0, 12]]

true_kld = nquad(kld_gamma_exponentials, gexp_lims, args=(dist1_params, dist2_params,))[0]

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "gexp||gexp", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:31:02 - (GEXP, 1, 10000, 1) - Time: 0.04296 s - Est.: 0.211 nats
2023-11-14 11:31:07 - (GEXP, 1, 10000, 2) - Time: 0.01801 s - Est.: -inf nats


True KLD: 0.175 nats


In [12]:
# # # # # 4D-GAUSSIAN # # # # #

experiment = "4d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

def kld_scipy_mnorm(d1, d2):
    a = np.log(np.linalg.det(d2.cov) / np.linalg.det(d1.cov))
    b = np.trace(np.linalg.inv(d2.cov) @ d1.cov)
    c = (d1.mean - d2.mean) @ np.linalg.inv(d2.cov) @ (d1.mean - d2.mean).T
    n = len(d1.mean)

    kld = 0.5 * (a + b) + 0.5 * (c - n)
    return kld

dist1 = stats.multivariate_normal(mean=dist1_params[0][0], cov=dist1_params[0][1])
dist2 = stats.multivariate_normal(mean=dist2_params[0][0], cov=dist2_params[0][1])

true_kld = kld_scipy_mnorm(dist1, dist2)

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "4dgauss||4dgauss", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:31:30 - (4D-GAUSSIAN, 1, 10000, 1) - Time: 0.04398 s - Est.: 0.852 nats
2023-11-14 11:31:46 - (4D-GAUSSIAN, 1, 10000, 2) - Time: 0.06512 s - Est.: 0.814 nats


True KLD: 0.901 nats


In [13]:
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000]

# # # # # 10D-GAUSSIAN # # # # #

experiment = "10d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist1_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])
    dist2_params = ast.literal_eval(f[experiment]["q"].attrs["hyper_params"])

def kld_scipy_mnorm(d1, d2):
    a = np.log(np.linalg.det(d2.cov) / np.linalg.det(d1.cov))
    b = np.trace(np.linalg.inv(d2.cov) @ d1.cov)
    c = (d1.mean - d2.mean) @ np.linalg.inv(d2.cov) @ (d1.mean - d2.mean).T
    n = len(d1.mean)

    kld = 0.5 * (a + b) + 0.5 * (c - n)
    return kld

dist1 = stats.multivariate_normal(mean=dist1_params[0][0], cov=dist1_params[0][1])
dist2 = stats.multivariate_normal(mean=dist2_params[0][0], cov=dist2_params[0][1])

true_kld = kld_scipy_mnorm(dist1, dist2)

eval.evaluate_kld(experiment, 1)

# Save
eval.write_double_to_hdf5(experiment, "10dgauss||10dgauss", true_kld)
print(f"True KLD: {true_kld:.3f} nats")

2023-11-14 11:32:18 - (10D-GAUSSIAN, 1, 10000, 1) - Time: 1.33175 s - Est.: 7.395 nats
2023-11-14 11:35:58 - (10D-GAUSSIAN, 1, 10000, 2) - Time: 1.32266 s - Est.: 7.186 nats


True KLD: 6.999 nats
