In [2]:
import h5py
import ast
from utils.misc_utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

from unite_toolbox.bin_estimators import calc_bin_mutual_information

In [3]:
from utils.bin_evaluators import EvaluatorBIN

eval = EvaluatorBIN()

eval.data_path = "data.hdf5"
eval.out_path = "results/mi.hdf5"
eval.logger = get_logger("results/bin_mi.log")

eval.quantity = "mi"

eval.hyper_params = ["scott", "fd", "sturges"]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 6)

for k, v in vars(eval).items():
    print(f"{k} - {v}")


eval.create_database()
eval.create_group()

experiments = [
    "bivariate-normal",
    "bivariate-normal-mixture",
    "gexp",
    "4d-gaussian"
]

data_path - data.hdf5
out_path - results/mi.hdf5
logger - <RootLogger root (DEBUG)>
quantity - mi
hyper_params - ['scott', 'fd', 'sturges']
sample_sizes - [100, 200, 500, 1000, 5000, 10000, 50000, 100000]
seeds - range(1, 6)
results - None


In [6]:
# # # # # BIVARIATE NORMAL # # # # #

experiment = "bivariate-normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

cov = np.array(dist_params[0][1])
d = len(cov)
true_mi = 0.5 * np.log(cov[0, 0] * cov[-1, -1] / np.linalg.det(cov)) # Reference

eval.evaluate_mutual_information(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-09 19:12:25 - (BIVARIATE-NORMAL, scott, 10000, 1) - Time: 0.00509 s - Est.: 0.210 nats
2023-11-09 19:12:26 - (BIVARIATE-NORMAL, scott, 10000, 2) - Time: 0.00484 s - Est.: 0.217 nats
2023-11-09 19:12:26 - (BIVARIATE-NORMAL, scott, 10000, 3) - Time: 0.00547 s - Est.: 0.206 nats
2023-11-09 19:12:26 - (BIVARIATE-NORMAL, scott, 10000, 4) - Time: 0.00371 s - Est.: 0.208 nats
2023-11-09 19:12:26 - (BIVARIATE-NORMAL, scott, 10000, 5) - Time: 0.00357 s - Est.: 0.215 nats


True mutual information: 0.144 nats


In [7]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def mi_mnorm(x, y, params1):
    params_x = []
    params_y = []
    for dist in params1:
        params_x.append([dist[0][0], dist[1][0][0], dist[2]])
        params_y.append([dist[0][1], dist[1][1][1], dist[2]])

    pxy = pdf_mnorm(x, y, params1)
    px = pdf_normal(x, params_x)
    py = pdf_normal(y, params_y)
    
    return pxy * np.log(pxy / (px * py))


binorm_lims = [[-7, 7], [-7, 7]]

true_mi = nquad(mi_mnorm, binorm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_mutual_information(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-09 19:14:21 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 1) - Time: 0.00569 s - Est.: 0.191 nats
2023-11-09 19:14:21 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 2) - Time: 0.00556 s - Est.: 0.188 nats
2023-11-09 19:14:21 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 3) - Time: 0.00494 s - Est.: 0.181 nats
2023-11-09 19:14:21 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 4) - Time: 0.00401 s - Est.: 0.188 nats
2023-11-09 19:14:22 - (BIVARIATE-NORMAL-MIXTURE, scott, 10000, 5) - Time: 0.00605 s - Est.: 0.190 nats


True mutual information: 0.140 nats


In [9]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

tetha = dist_params[0][0]
true_mi = digamma(tetha) - np.log(tetha) + (1 / tetha) # Reference

eval.evaluate_mutual_information(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-09 19:15:02 - (GEXP, scott, 10000, 1) - Time: 0.00580 s - Est.: 0.191 nats


2023-11-09 19:15:03 - (GEXP, scott, 10000, 2) - Time: 0.00463 s - Est.: 0.202 nats
2023-11-09 19:15:03 - (GEXP, scott, 10000, 3) - Time: 0.00355 s - Est.: 0.187 nats
2023-11-09 19:15:03 - (GEXP, scott, 10000, 4) - Time: 0.00384 s - Est.: 0.183 nats
2023-11-09 19:15:03 - (GEXP, scott, 10000, 5) - Time: 0.00473 s - Est.: 0.183 nats


True mutual information: 0.158 nats


In [10]:
# # # # # 4D GAUSSIAN # # # # #

experiment = "4d-gaussian"
eval.seeds = range(1, 2)

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

cov = np.array(dist_params[0][1])
d = len(cov)
true_mi = 0.5 * np.log(np.linalg.det(cov[:d-1,:d-1]) * cov[-1, -1] / np.linalg.det(cov)) # Reference

eval.evaluate_mutual_information(experiment, "scott")

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-09 19:15:16 - (4D-GAUSSIAN, scott, 10000, 1) - Time: 4.54912 s - Est.: 2.645 nats


True mutual information: 0.235 nats
