In [1]:
import h5py
import ast
from utils.misc_utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

In [2]:
from utils.bin_evaluators import Evaluator_BIN

eval = Evaluator_BIN()

eval.data_path = "data.hdf5"
eval.out_path = "results/bin.hdf5"
eval.logger = get_logger("results/bin_entropy.log")

eval.quantity = "h"

eval.hyper_params = ["scott", "fd", "sturges"]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 4)

for k, v in vars(eval).items():
    print(f"{k} - {v}")

eval.create_database()
eval.create_group()

experiments = [
    "uniform",
    "normal",
    "normal-mixture",
    "exponential",
    "bivariate-normal",
    "bivariate-normal-mixture",
    "gexp",
    "4d-gaussian"
]

data_path - data.hdf5
out_path - results/bin.hdf5
logger - <RootLogger root (DEBUG)>
quantity - h
hyper_params - ['scott', 'fd', 'sturges']
sample_sizes - [100, 200, 500, 1000, 5000, 10000, 50000, 100000]
seeds - range(1, 4)
results - None


In [3]:
eval.hyper_params.append("qs")

# # # # # UNIFORM # # # # #

experiment = "uniform"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = np.log(dist_params[0][1]) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:41:15 - Creating converter from 3 to 5
2023-11-13 12:41:15 - (UNIFORM, scott, 10000, 1) - Time: 0.00054 s - Est.: 0.404 nats
2023-11-13 12:41:15 - (UNIFORM, qs, 10000, 1) - Time: 0.15971 s - Est.: 0.398 nats
2023-11-13 12:41:18 - (UNIFORM, scott, 10000, 2) - Time: 0.00063 s - Est.: 0.404 nats
2023-11-13 12:41:18 - (UNIFORM, qs, 10000, 2) - Time: 0.15241 s - Est.: 0.398 nats
2023-11-13 12:41:20 - (UNIFORM, scott, 10000, 3) - Time: 0.00047 s - Est.: 0.404 nats
2023-11-13 12:41:20 - (UNIFORM, qs, 10000, 3) - Time: 0.18609 s - Est.: 0.400 nats
2023-11-13 12:41:24 - Creating converter from 5 to 3


True entropy: 0.405 nats


In [4]:
# # # # # NORMAL # # # # #

experiment = "normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 0.5 * np.log(2 * np.pi * (dist_params[0][1]**2)) + 0.5 # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:41:24 - (NORMAL, scott, 10000, 1) - Time: 0.00057 s - Est.: 2.318 nats
2023-11-13 12:41:24 - (NORMAL, qs, 10000, 1) - Time: 0.24187 s - Est.: 2.316 nats
2023-11-13 12:41:27 - (NORMAL, scott, 10000, 2) - Time: 0.00059 s - Est.: 2.330 nats
2023-11-13 12:41:27 - (NORMAL, qs, 10000, 2) - Time: 0.19040 s - Est.: 2.325 nats
2023-11-13 12:41:31 - (NORMAL, scott, 10000, 3) - Time: 0.00060 s - Est.: 2.333 nats
2023-11-13 12:41:31 - (NORMAL, qs, 10000, 3) - Time: 0.18985 s - Est.: 2.340 nats


True entropy: 2.335 nats


In [5]:
# # # # # NORMAL-MIXTURE # # # # #

experiment = "normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def h_normal(x, params):
    p = pdf_normal(x, params)
    return -1 * p * np.log(p)

norm_lims = [[-15, 25]]

true_h = nquad(h_normal, norm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:41:34 - (NORMAL-MIXTURE, scott, 10000, 1) - Time: 0.00065 s - Est.: 2.377 nats
2023-11-13 12:41:35 - (NORMAL-MIXTURE, qs, 10000, 1) - Time: 0.21869 s - Est.: 2.376 nats
2023-11-13 12:41:38 - (NORMAL-MIXTURE, scott, 10000, 2) - Time: 0.00061 s - Est.: 2.383 nats
2023-11-13 12:41:38 - (NORMAL-MIXTURE, qs, 10000, 2) - Time: 0.20199 s - Est.: 2.373 nats
2023-11-13 12:41:41 - (NORMAL-MIXTURE, scott, 10000, 3) - Time: 0.00056 s - Est.: 2.386 nats
2023-11-13 12:41:42 - (NORMAL-MIXTURE, qs, 10000, 3) - Time: 0.19743 s - Est.: 2.377 nats


True entropy: 2.385 nats


In [6]:
# # # # # EXPONENTIAL # # # # #

experiment = "exponential"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 1 - np.log(1/dist_params[0][1]) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:41:45 - (EXPONENTIAL, scott, 10000, 1) - Time: 0.00065 s - Est.: 0.302 nats
2023-11-13 12:41:45 - (EXPONENTIAL, qs, 10000, 1) - Time: 0.27868 s - Est.: 0.313 nats
2023-11-13 12:41:49 - (EXPONENTIAL, scott, 10000, 2) - Time: 0.00067 s - Est.: 0.302 nats
2023-11-13 12:41:49 - (EXPONENTIAL, qs, 10000, 2) - Time: 0.19992 s - Est.: 0.306 nats
2023-11-13 12:41:52 - (EXPONENTIAL, scott, 10000, 3) - Time: 0.00051 s - Est.: 0.320 nats
2023-11-13 12:41:52 - (EXPONENTIAL, qs, 10000, 3) - Time: 0.18630 s - Est.: 0.325 nats


True entropy: 0.307 nats


In [7]:
eval.hyper_params = ["scott", "fd", "sturges"]

# # # # # BIVARIATE-NORMAL # # # # #

experiment = "bivariate-normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:41:55 - (BIVARIATE-NORMAL, scott, 10000, 1) - Time: 0.00138 s - Est.: 2.636 nats
2023-11-13 12:41:55 - (BIVARIATE-NORMAL, scott, 10000, 2) - Time: 0.00172 s - Est.: 2.619 nats
2023-11-13 12:41:55 - (BIVARIATE-NORMAL, scott, 10000, 3) - Time: 0.00131 s - Est.: 2.617 nats


True entropy: 2.694 nats


In [8]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def h_mnorm(x, y, params1):
    p = pdf_mnorm(x, y, params1)
    return -1 * p * np.log(p)

binorm_lims = [[-7, 7], [-7, 7]]

true_h = nquad(h_mnorm, binorm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:42:01 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 1) - Time: 0.00112 s - Est.: 3.278 nats
2023-11-13 12:42:01 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 2) - Time: 0.00112 s - Est.: 3.280 nats
2023-11-13 12:42:01 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 3) - Time: 0.00121 s - Est.: 3.284 nats


True entropy: 3.330 nats


In [9]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

tetha = dist_params[0][0]
true_h = 1 +  tetha - tetha * digamma(tetha) + np.log(gamma(tetha)) - np.log(1.0) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:42:02 - (GEXP, scott, 10000, 1) - Time: 0.00119 s - Est.: 1.928 nats
2023-11-13 12:42:02 - (GEXP, scott, 10000, 2) - Time: 0.00128 s - Est.: 1.885 nats
2023-11-13 12:42:02 - (GEXP, scott, 10000, 3) - Time: 0.00181 s - Est.: 1.898 nats


True entropy: 1.925 nats


In [10]:
eval.seeds = range(1, 2)

# # # # # 4D-GAUSSIAN # # # # #

experiment = "4d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate_entropy(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

2023-11-13 12:42:03 - (4D-GAUSSIAN, scott, 10000, 1) - Time: 0.30739 s - Est.: 1.857 nats


True entropy: 5.094 nats
