In [1]:
import h5py
import ast
from utils.misc_utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

from unite_toolbox.knn_estimators import calc_knn_entropy

In [2]:
from utils.knn_evaluators import Evaluator_KNN

eval = Evaluator_KNN()

eval.data_path = "data.hdf5"
eval.out_path = "results/knn.hdf5"
eval.logger = get_logger("results/knn_entropy.log")

eval.quantity = "H"

eval.hyper_params = [1, 3, 5, 15, 50]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 3)

for k, v in vars(eval).items():
    print(f"{k} - {v}")

eval.create_database()
eval.create_group()

data_path - data.hdf5
out_path - results/knn.hdf5
logger - <RootLogger root (DEBUG)>
quantity - H
hyper_params - [1, 3, 5, 15, 50]
sample_sizes - [100, 200, 500, 1000, 5000, 10000, 50000, 100000]
seeds - range(1, 3)
results - None


In [3]:
# # # # # UNIFORM # # # # #

experiment = "uniform"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = np.log(dist_params[0][1]) # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:57:56 - Creating converter from 3 to 5
2023-11-14 10:57:56 - (UNIFORM, 1, 10000, 1) - Time: 0.00710 s - Est.: 0.437 nats
2023-11-14 10:57:58 - (UNIFORM, 1, 10000, 2) - Time: 0.00700 s - Est.: 0.424 nats


True entropy: 0.405 nats


In [4]:
# # # # # NORMAL # # # # #

experiment = "normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 0.5 * np.log(2 * np.pi * (dist_params[0][1]**2)) + 0.5 # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:57:59 - (NORMAL, 1, 10000, 1) - Time: 0.00887 s - Est.: 2.332 nats
2023-11-14 10:58:01 - (NORMAL, 1, 10000, 2) - Time: 0.00663 s - Est.: 2.329 nats
2023-11-14 10:58:02 - Creating converter from 5 to 3


True entropy: 2.335 nats


In [5]:
# # # # # NORMAL-MIXTURE # # # # #

experiment = "normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def h_normal(x, params):
    p = pdf_normal(x, params)
    return -1 * p * np.log(p)

norm_lims = [[-15, 25]]

true_h = nquad(h_normal, norm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:58:23 - (NORMAL-MIXTURE, 1, 10000, 1) - Time: 0.00697 s - Est.: 2.364 nats
2023-11-14 10:58:25 - (NORMAL-MIXTURE, 1, 10000, 2) - Time: 0.00729 s - Est.: 2.358 nats


True entropy: 2.385 nats


In [6]:
# # # # # EXPONENTIAL # # # # #

experiment = "exponential"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 1 - np.log(1/dist_params[0][1]) # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:59:03 - (EXPONENTIAL, 1, 10000, 1) - Time: 0.01345 s - Est.: 0.301 nats
2023-11-14 10:59:04 - (EXPONENTIAL, 1, 10000, 2) - Time: 0.00657 s - Est.: 0.272 nats


True entropy: 0.307 nats


In [7]:
# # # # # BIVARIATE-NORMAL # # # # #

experiment = "bivariate-normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:59:13 - (BIVARIATE-NORMAL, 1, 10000, 1) - Time: 0.00887 s - Est.: 2.724 nats
2023-11-14 10:59:16 - (BIVARIATE-NORMAL, 1, 10000, 2) - Time: 0.01114 s - Est.: 2.698 nats


True entropy: 2.694 nats


In [8]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def h_mnorm(x, y, params1):
    p = pdf_mnorm(x, y, params1)
    return -1 * p * np.log(p)

binorm_lims = [[-7, 7], [-7, 7]]

true_h = nquad(h_mnorm, binorm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:59:30 - (BIVARIATE-NORMAL-MIXTURE, 1, 10000, 1) - Time: 0.01088 s - Est.: 3.336 nats
2023-11-14 10:59:33 - (BIVARIATE-NORMAL-MIXTURE, 1, 10000, 2) - Time: 0.01014 s - Est.: 3.322 nats


True entropy: 3.330 nats


In [9]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

tetha = dist_params[0][0]
true_h = 1 +  tetha - tetha * digamma(tetha) + np.log(gamma(tetha)) - np.log(1.0) # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:59:40 - (GEXP, 1, 10000, 1) - Time: 0.01468 s - Est.: 1.937 nats
2023-11-14 10:59:43 - (GEXP, 1, 10000, 2) - Time: 0.01015 s - Est.: 1.921 nats


True entropy: 1.925 nats


In [10]:
# # # # # 4D-GAUSSIAN # # # # #

experiment = "4d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 10:59:55 - (4D-GAUSSIAN, 1, 10000, 1) - Time: 0.02360 s - Est.: 5.080 nats
2023-11-14 11:00:02 - (4D-GAUSSIAN, 1, 10000, 2) - Time: 0.01982 s - Est.: 5.097 nats


True entropy: 5.094 nats


In [11]:
# # # # # 10D-GAUSSIAN # # # # #

experiment = "10d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
   dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate(experiment, calc_knn_entropy, 1)

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-14 11:00:17 - (10D-GAUSSIAN, 1, 10000, 1) - Time: 0.25864 s - Est.: 4.976 nats
2023-11-14 11:03:47 - (10D-GAUSSIAN, 1, 10000, 2) - Time: 0.32724 s - Est.: 4.981 nats


True entropy: 4.932 nats
