In [1]:
import h5py
import ast
from utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

In [2]:
from evaluators.bin_evaluators import EvaluatorBIN

eval = EvaluatorBIN()

eval.data_path = "data.hdf5"
eval.out_path = "results/bin.hdf5"
eval.logger = get_logger("results/bin_entropy.log")

eval.quantity = "h"

eval.hyper_params = ["scott", "fd", "sturges"]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 6)

for k, v in vars(eval).items():
    print(f"{k} - {v}")

eval.create_database()
eval.create_group()

experiments = [
    "uniform",
    "normal",
    "normal-mixture",
    "exponential",
    "bivariate-normal",
    "bivariate-normal-mixture",
    "gexp",
    "4d-gaussian"
]

ModuleNotFoundError: No module named 'base_evaluator'

In [None]:
eval.hyper_params.append("qs")

# # # # # UNIFORM # # # # #

experiment = "uniform"
# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = np.log(dist_params[0][1]) # Reference

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
# # # # # NORMAL # # # # #

experiment = "normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 0.5 * np.log(2 * np.pi * (dist_params[0][1]**2)) + 0.5 # Reference

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
# # # # # NORMAL-MIXTURE # # # # #

experiment = "normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def h_normal(x, params):
    p = pdf_normal(x, params)
    return -1 * p * np.log(p)

norm_lims = [[-15, 25]]

true_h = nquad(h_normal, norm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
# # # # # EXPONENTIAL # # # # #

experiment = "exponential"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

true_h = 1 - np.log(1/dist_params[0][1]) # Reference

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
eval.hyper_params = ["scott", "fd", "sturges"]

# # # # # BIVARIATE-NORMAL # # # # #

experiment = "bivariate-normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def h_mnorm(x, y, params1):
    p = pdf_mnorm(x, y, params1)
    return -1 * p * np.log(p)

binorm_lims = [[-7, 7], [-7, 7]]

true_h = nquad(h_mnorm, binorm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

tetha = dist_params[0][0]
true_h = 1 +  tetha - tetha * digamma(tetha) + np.log(gamma(tetha)) - np.log(1.0) # Reference

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")

In [None]:
eval.seeds = range(1, 2)

# # # # # 4D-GAUSSIAN # # # # #

experiment = "4d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

d = len(dist_params[0][1])
true_h = 0.5 * np.log((2 * np.pi * np.exp(1)) ** d * np.linalg.det(dist_params[0][1])) # Reference

eval.evaluate_entropy(experiment, calc_bin_entropy, "scott")

# Save
eval.write_to_hdf5(experiment, true_h)
print(f"True entropy: {true_h:.3f} nats")