In [1]:
import h5py
import ast
from utils.misc_utils import get_logger

import numpy as np
from scipy import stats
from scipy.special import gamma, digamma
from scipy.integrate import nquad

In [2]:
from utils.knn_evaluators import Evaluator_KNN

eval = Evaluator_KNN()

eval.data_path = "data.hdf5"
eval.out_path = "results/knn.hdf5"
eval.logger = get_logger("results/knn_mi.log")

eval.quantity = "MI"

eval.hyper_params = [1, 3, 5, 15, 50]
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000, 100_000]
eval.seeds = range(1, 3)

for k, v in vars(eval).items():
    print(f"{k} - {v}")

eval.create_database()
eval.create_group()

data_path - data.hdf5
out_path - results/knn.hdf5
logger - <RootLogger root (DEBUG)>
quantity - MI
hyper_params - [1, 3, 5, 15, 50]
sample_sizes - [100, 200, 500, 1000, 5000, 10000, 50000, 100000]
seeds - range(1, 3)
results - None


In [3]:
# # # # # BIVARIATE-NORMAL # # # # #

experiment = "bivariate-normal"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

cov = np.array(dist_params[0][1])
d = len(cov)
true_mi = 0.5 * np.log(cov[0, 0] * cov[-1, -1] / np.linalg.det(cov)) # Reference

eval.evaluate_mutual_information(experiment, 15)

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-14 11:13:22 - Creating converter from 3 to 5
2023-11-14 11:13:23 - (BIVARIATE-NORMAL, 15, 10000, 1) - Time: 0.11398 s - Est.: 0.142 nats
2023-11-14 11:13:36 - (BIVARIATE-NORMAL, 15, 10000, 2) - Time: 0.11948 s - Est.: 0.148 nats
2023-11-14 11:13:53 - Creating converter from 5 to 3


True mutual information: 0.144 nats


In [4]:
# # # # # BIVARIATE-NORMAL-MIXTURE # # # # #

experiment = "bivariate-normal-mixture"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

def pdf_normal(x, params):
    y = 0.0
    for dist in params:
        l, s, w = dist
        y += stats.norm(loc=l, scale=s).pdf(x) * w
    return y

def pdf_mnorm(x, y, params):
    z = 0.0
    for dist in params:
        l, s, w = dist
        z += stats.multivariate_normal(mean=l, cov=s).pdf(np.dstack((x, y))) * w
    return z

def mi_mnorm(x, y, params1):
    params_x = []
    params_y = []
    for dist in params1:
        params_x.append([dist[0][0], dist[1][0][0], dist[2]])
        params_y.append([dist[0][1], dist[1][1][1], dist[2]])

    pxy = pdf_mnorm(x, y, params1)
    px = pdf_normal(x, params_x)
    py = pdf_normal(y, params_y)
    
    return pxy * np.log(pxy / (px * py))


binorm_lims = [[-7, 7], [-7, 7]]

true_mi = nquad(mi_mnorm, binorm_lims, args=(dist_params,))[0] # Numerical Integration Result

eval.evaluate_mutual_information(experiment, 15)

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-14 11:15:07 - (BIVARIATE-NORMAL-MIXTURE, 15, 10000, 1) - Time: 0.13439 s - Est.: 0.136 nats
2023-11-14 11:15:26 - (BIVARIATE-NORMAL-MIXTURE, 15, 10000, 2) - Time: 0.17010 s - Est.: 0.137 nats


True mutual information: 0.140 nats


In [5]:
# # # # # GAMMA-EXPONENTIAL # # # # #

experiment = "gexp"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

tetha = dist_params[0][0]
true_mi = digamma(tetha) - np.log(tetha) + (1 / tetha) # Reference

eval.evaluate_mutual_information(experiment, 15)

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-14 11:16:16 - (GEXP, 15, 10000, 1) - Time: 0.11102 s - Est.: 0.152 nats
2023-11-14 11:16:37 - (GEXP, 15, 10000, 2) - Time: 0.19146 s - Est.: 0.154 nats


True mutual information: 0.158 nats


In [6]:
# # # # # 4D GAUSSIAN # # # # #

experiment = "4d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
    dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

cov = np.array(dist_params[0][1])
d = len(cov)
true_mi = 0.5 * np.log(np.linalg.det(cov[:d-1,:d-1]) * cov[-1, -1] / np.linalg.det(cov)) # Reference

eval.evaluate_mutual_information(experiment, 15)

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-14 11:17:21 - (4D-GAUSSIAN, 15, 10000, 1) - Time: 0.48243 s - Est.: 0.226 nats


True mutual information: 0.235 nats


In [8]:
eval.sample_sizes = [100, 200, 500, 1_000, 5_000, 10_000, 50_000]

# # # # # 10D-GAUSSIAN # # # # #

experiment = "10d-gaussian"

# Calculate Truth
with h5py.File(eval.data_path, "r") as f:
   dist_params = ast.literal_eval(f[experiment]["p"].attrs["hyper_params"])

cov = np.array(dist_params[0][1])
d = len(cov)
true_mi = 0.5 * np.log(np.linalg.det(cov[:d-1,:d-1]) * cov[-1, -1] / np.linalg.det(cov)) # Reference

eval.evaluate_mutual_information(experiment, 15)

# Save
eval.write_single_to_hdf5(experiment, true_mi)
print(f"True mutual information: {true_mi:.3f} nats")

2023-11-14 11:19:56 - (10D-GAUSSIAN, 15, 10000, 1) - Time: 2.68919 s - Est.: 1.007 nats


True mutual information: 1.099 nats
