In [1]:
import h5py
import ast
from utils.misc_utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

In [2]:
from utils.bin_evaluators import EvaluatorBIN

eval = EvaluatorBIN()

eval.data_path = "data.hdf5"
eval.out_path = "results/bin.hdf5"
eval.logger = get_logger("results/bin_entropy.log")

eval.quantity = "h"

eval.hyper_params = ["scott", "fd", "sturges"]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 6)

for k, v in vars(eval).items():
    print(f"{k} - {v}")

eval.create_database()
eval.create_group()

experiments = [
    "uniform",
    "normal",
    "normal-mixture",
    "exponential",
    "bivariate-normal",
    "bivariate-normal-mixture",
    "gexp",
    "4d-gaussian"
]

data_path - data.hdf5
out_path - results/bin.hdf5
logger - <RootLogger root (DEBUG)>
quantity - h
hyper_params - ['scott', 'fd', 'sturges']
sample_sizes - [100, 200, 500, 1000, 5000, 10000, 50000, 100000]
seeds - range(1, 6)
results - None


In [3]:
eval.hyper_params.append("qs")

# # # # # UNIFORM # # # # #

experiment = "uniform"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = np.log(dist_params[0][1]) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:05:10 - Creating converter from 3 to 5
2023-11-09 19:05:10 - (UNIFORM, scott, 10000, 1) - Time: 0.00049 s - Est.: 0.404 nats
2023-11-09 19:05:10 - (UNIFORM, qs, 10000, 1) - Time: 0.15019 s - Est.: 0.399 nats
2023-11-09 19:05:12 - (UNIFORM, scott, 10000, 2) - Time: 0.00046 s - Est.: 0.404 nats
2023-11-09 19:05:13 - (UNIFORM, qs, 10000, 2) - Time: 0.15224 s - Est.: 0.399 nats
2023-11-09 19:05:15 - (UNIFORM, scott, 10000, 3) - Time: 0.00044 s - Est.: 0.404 nats
2023-11-09 19:05:15 - (UNIFORM, qs, 10000, 3) - Time: 0.15996 s - Est.: 0.400 nats
2023-11-09 19:05:18 - (UNIFORM, scott, 10000, 4) - Time: 0.00044 s - Est.: 0.404 nats
2023-11-09 19:05:18 - (UNIFORM, qs, 10000, 4) - Time: 0.15008 s - Est.: 0.399 nats
2023-11-09 19:05:21 - (UNIFORM, scott, 10000, 5) - Time: 0.00085 s - Est.: 0.404 nats
2023-11-09 19:05:22 - (UNIFORM, qs, 10000, 5) - Time: 0.20184 s - Est.: 0.400 nats


True entropy: 0.405 nats


In [5]:
# # # # # NORMAL # # # # #

experiment = "normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 0.5 * np.log(2 * np.pi * (dist_params[0][1]**2)) + 0.5 # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:05:52 - (NORMAL, scott, 10000, 1) - Time: 0.00043 s - Est.: 2.318 nats
2023-11-09 19:05:52 - (NORMAL, qs, 10000, 1) - Time: 0.15174 s - Est.: 2.304 nats
2023-11-09 19:05:55 - (NORMAL, scott, 10000, 2) - Time: 0.00059 s - Est.: 2.330 nats
2023-11-09 19:05:55 - (NORMAL, qs, 10000, 2) - Time: 0.14843 s - Est.: 2.332 nats
2023-11-09 19:05:59 - (NORMAL, scott, 10000, 3) - Time: 0.00107 s - Est.: 2.333 nats
2023-11-09 19:05:59 - (NORMAL, qs, 10000, 3) - Time: 0.31118 s - Est.: 2.331 nats
2023-11-09 19:06:02 - (NORMAL, scott, 10000, 4) - Time: 0.00062 s - Est.: 2.339 nats
2023-11-09 19:06:03 - (NORMAL, qs, 10000, 4) - Time: 0.19612 s - Est.: 2.329 nats
2023-11-09 19:06:06 - (NORMAL, scott, 10000, 5) - Time: 0.00056 s - Est.: 2.329 nats
2023-11-09 19:06:06 - (NORMAL, qs, 10000, 5) - Time: 0.19066 s - Est.: 2.320 nats


True entropy: 2.335 nats


In [6]:
# # # # # NORMAL-MIXTURE # # # # #

experiment = "normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def h_normal(x, params):
    p = pdf_normal(x, params)
    return -1 * p * np.log(p)

norm_lims = [[-15, 25]]

true_h = nquad(h_normal, norm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:06:49 - (NORMAL-MIXTURE, scott, 10000, 1) - Time: 0.00042 s - Est.: 2.377 nats
2023-11-09 19:06:49 - (NORMAL-MIXTURE, qs, 10000, 1) - Time: 0.14604 s - Est.: 2.389 nats
2023-11-09 19:06:52 - (NORMAL-MIXTURE, scott, 10000, 2) - Time: 0.00046 s - Est.: 2.383 nats
2023-11-09 19:06:52 - (NORMAL-MIXTURE, qs, 10000, 2) - Time: 0.16292 s - Est.: 2.381 nats
2023-11-09 19:06:54 - (NORMAL-MIXTURE, scott, 10000, 3) - Time: 0.00046 s - Est.: 2.386 nats
2023-11-09 19:06:54 - (NORMAL-MIXTURE, qs, 10000, 3) - Time: 0.16636 s - Est.: 2.365 nats
2023-11-09 19:06:57 - (NORMAL-MIXTURE, scott, 10000, 4) - Time: 0.00058 s - Est.: 2.393 nats
2023-11-09 19:06:57 - (NORMAL-MIXTURE, qs, 10000, 4) - Time: 0.20817 s - Est.: 2.391 nats
2023-11-09 19:07:00 - (NORMAL-MIXTURE, scott, 10000, 5) - Time: 0.00044 s - Est.: 2.398 nats
2023-11-09 19:07:00 - (NORMAL-MIXTURE, qs, 10000, 5) - Time: 0.15569 s - Est.: 2.384 nats


True entropy: 2.385 nats


In [7]:
# # # # # EXPONENTIAL # # # # #

experiment = "exponential"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 1 - np.log(1/dist_params[0][1]) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:07:05 - (EXPONENTIAL, scott, 10000, 1) - Time: 0.00055 s - Est.: 0.302 nats
2023-11-09 19:07:05 - (EXPONENTIAL, qs, 10000, 1) - Time: 0.18787 s - Est.: 0.293 nats
2023-11-09 19:07:08 - (EXPONENTIAL, scott, 10000, 2) - Time: 0.00058 s - Est.: 0.302 nats
2023-11-09 19:07:08 - (EXPONENTIAL, qs, 10000, 2) - Time: 0.20676 s - Est.: 0.298 nats
2023-11-09 19:07:11 - (EXPONENTIAL, scott, 10000, 3) - Time: 0.00055 s - Est.: 0.320 nats
2023-11-09 19:07:11 - (EXPONENTIAL, qs, 10000, 3) - Time: 0.18835 s - Est.: 0.323 nats
2023-11-09 19:07:15 - (EXPONENTIAL, scott, 10000, 4) - Time: 0.00059 s - Est.: 0.319 nats
2023-11-09 19:07:15 - (EXPONENTIAL, qs, 10000, 4) - Time: 0.18998 s - Est.: 0.308 nats
2023-11-09 19:07:18 - (EXPONENTIAL, scott, 10000, 5) - Time: 0.00053 s - Est.: 0.313 nats
2023-11-09 19:07:18 - (EXPONENTIAL, qs, 10000, 5) - Time: 0.19143 s - Est.: 0.319 nats


True entropy: 0.307 nats


In [8]:
eval.hyper_params = ["scott", "fd", "sturges"]

# # # # # BIVARIATE-NORMAL # # # # #

experiment = "bivariate-normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:07:32 - (BIVARIATE-NORMAL, scott, 10000, 1) - Time: 0.00099 s - Est.: 2.636 nats
2023-11-09 19:07:32 - (BIVARIATE-NORMAL, scott, 10000, 2) - Time: 0.00088 s - Est.: 2.619 nats
2023-11-09 19:07:32 - (BIVARIATE-NORMAL, scott, 10000, 3) - Time: 0.00093 s - Est.: 2.617 nats
2023-11-09 19:07:32 - (BIVARIATE-NORMAL, scott, 10000, 4) - Time: 0.00244 s - Est.: 2.626 nats
2023-11-09 19:07:32 - (BIVARIATE-NORMAL, scott, 10000, 5) - Time: 0.00089 s - Est.: 2.628 nats


True entropy: 2.694 nats


In [9]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def h_mnorm(x, y, params1):
    p = pdf_mnorm(x, y, params1)
    return -1 * p * np.log(p)

binorm_lims = [[-7, 7], [-7, 7]]

true_h = nquad(h_mnorm, binorm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:07:45 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 1) - Time: 0.00088 s - Est.: 3.278 nats
2023-11-09 19:07:45 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 2) - Time: 0.00088 s - Est.: 3.280 nats
2023-11-09 19:07:46 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 3) - Time: 0.00114 s - Est.: 3.284 nats
2023-11-09 19:07:46 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 4) - Time: 0.00082 s - Est.: 3.287 nats
2023-11-09 19:07:46 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 5) - Time: 0.00264 s - Est.: 3.278 nats


True entropy: 3.330 nats


In [10]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

tetha = dist_params[0][0]
true_h = 1 +  tetha - tetha * digamma(tetha) + np.log(gamma(tetha)) - np.log(1.0) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:07:53 - (GEXP, scott, 10000, 1) - Time: 0.00102 s - Est.: 1.928 nats
2023-11-09 19:07:53 - (GEXP, scott, 10000, 2) - Time: 0.00104 s - Est.: 1.885 nats
2023-11-09 19:07:53 - (GEXP, scott, 10000, 3) - Time: 0.00104 s - Est.: 1.898 nats
2023-11-09 19:07:54 - (GEXP, scott, 10000, 4) - Time: 0.00134 s - Est.: 1.891 nats
2023-11-09 19:07:54 - (GEXP, scott, 10000, 5) - Time: 0.00094 s - Est.: 1.889 nats


True entropy: 1.925 nats


In [11]:
eval.seeds = range(1, 2)

# # # # # 4D-GAUSSIAN # # # # #

experiment = "4d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-09 19:08:02 - (4D-GAUSSIAN, scott, 10000, 1) - Time: 0.27325 s - Est.: 1.857 nats


True entropy: 5.094 nats
