## Mutual Information Estimation of Correlated Gaussians

In [1]:
import numpy as np
import tensorflow as tf

from estimators.binning_estimators import calc_bin_mi
from estimators.kde_estimators import calc_kde_gaussian_mi, calc_kde_mi
from estimators.knn_estimators import calc_ksg_mi_cc
from estimators.neural_estimators import calc_neural_mi

2024-07-01 11:52:36.046854: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9373] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-01 11:52:36.046928: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-01 11:52:36.048790: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1534] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-01 11:52:36.057460: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Generate Correlated Gaussians Dataset

In [2]:
def generate_data(rho, n, d):
    mean = [0,0]
    cov = np.array([[1,rho],[rho,1]])
    X = np.zeros((n,d))
    Y = np.zeros((n,d))
    for j in range(d):
        data = np.random.multivariate_normal(mean=mean, cov=cov, size=n)
        X[:,j] = data[:,0]
        Y[:,j] = data[:,1]
        
    return X,Y

In [3]:
rho = 0.8
d = 2
n = 10000

X,Y = generate_data(rho,n,d)
print(f'True I(X;Y): {-d/2*np.log(1-rho**2):.3f}')

True I(X;Y): 1.022


### Binning Estimator

In [60]:
for method in ['fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt']:
    mi = calc_bin_mi(X,Y,method)
    print(f'Method: {method}, I(X;Y) = {mi:.3f}')

Method: fd, I(X;Y) = 4.622
Method: doane, I(X;Y) = 1.161
Method: scott, I(X;Y) = 3.709
Method: rice, I(X;Y) = 3.375
Method: sturges, I(X;Y) = 1.161
Method: sqrt, I(X;Y) = 6.148


### KDE Estimator

In [61]:
mi = calc_kde_gaussian_mi(X, Y, bandwidth='silverman', mode='resubstitution')
print(f'I(X;Y) = {mi:.3f}')

I(X;Y) = 1.155


In [62]:
for kernel in ['gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine']:
    for bandwidth in ['scott', 'silverman']:
        mi = calc_kde_mi(X, Y, bandwidth, kernel)
        print(f'Kernel: {kernel}, Bandwidth: {bandwidth}, I(X;Y) = {mi:.3f}')

Kernel: gaussian, Bandwidth: scott, I(X;Y) = 0.977
Kernel: gaussian, Bandwidth: silverman, I(X;Y) = 0.999
Kernel: tophat, Bandwidth: scott, I(X;Y) = 1.246
Kernel: tophat, Bandwidth: silverman, I(X;Y) = 1.290
Kernel: epanechnikov, Bandwidth: scott, I(X;Y) = 1.571
Kernel: epanechnikov, Bandwidth: silverman, I(X;Y) = 1.655
Kernel: exponential, Bandwidth: scott, I(X;Y) = 0.559
Kernel: exponential, Bandwidth: silverman, I(X;Y) = 0.606
Kernel: linear, Bandwidth: scott, I(X;Y) = 1.773
Kernel: linear, Bandwidth: silverman, I(X;Y) = 1.877
Kernel: cosine, Bandwidth: scott, I(X;Y) = nan
Kernel: cosine, Bandwidth: silverman, I(X;Y) = nan


### KNN Estimator

In [7]:
for k in range(1,21):
    mi = calc_ksg_mi_cc(X, Y, k)
    print(f'k: {k}, I(X;Y) = {mi:.3f}')

k: 1, I(X;Y) = 1.031
k: 2, I(X;Y) = 1.045
k: 3, I(X;Y) = 1.049
k: 4, I(X;Y) = 1.046
k: 5, I(X;Y) = 1.040
k: 6, I(X;Y) = 1.038
k: 7, I(X;Y) = 1.036
k: 8, I(X;Y) = 1.033
k: 9, I(X;Y) = 1.032
k: 10, I(X;Y) = 1.031
k: 11, I(X;Y) = 1.029
k: 12, I(X;Y) = 1.027
k: 13, I(X;Y) = 1.027
k: 14, I(X;Y) = 1.026
k: 15, I(X;Y) = 1.025
k: 16, I(X;Y) = 1.024
k: 17, I(X;Y) = 1.021
k: 18, I(X;Y) = 1.020
k: 19, I(X;Y) = 1.019
k: 20, I(X;Y) = 1.017


### Neural Estimator

In [5]:
X_tensor = tf.convert_to_tensor(X)
Y_tensor = tf.convert_to_tensor(Y)

estimators = {
    'nwj' : {'train_obj': 'nwj', 'eval_type': 'nwj'},
    'dv' : {'train_obj': 'dv', 'eval_type': 'dv'},
    'cpc' : {'train_obj': 'cpc', 'eval_type': 'cpc'},
    'js' : {'train_obj': 'js_fgan', 'eval_type': 'nwj'},
    'smile': {'train_obj': 'js_fgan', 'eval_type': 'smile'},
    'nwj_direct' : {'train_obj': 'nwj', 'eval_type': 'direct'},
    'mine_direct' : {'train_obj': 'dv', 'eval_type': 'direct'},
    'js_direct' : {'train_obj': 'js_fgan', 'eval_type': 'direct'},
}
for estimator in estimators:
    for critic in ['separable', 'concat']:
        dataset = tf.data.Dataset.from_tensor_slices((X_tensor, Y_tensor)).batch(512)
        mi = calc_neural_mi(dataset, n_epochs=100, critic=critic, train_obj=estimators[estimator]['train_obj'], eval_type=estimators[estimator]['eval_type'], print_mi=False)
        print(f'Estimator: {estimator}, Critic: {critic}, I(X;Y) = {mi:.3f}')

Estimator: nwj, Critic: separable, I(X;Y) = 1.031
Estimator: nwj, Critic: concat, I(X;Y) = 1.047
Estimator: dv, Critic: separable, I(X;Y) = -8.773
Estimator: dv, Critic: concat, I(X;Y) = -0.002
Estimator: cpc, Critic: separable, I(X;Y) = -0.758
Estimator: cpc, Critic: concat, I(X;Y) = -0.968
Estimator: js, Critic: separable, I(X;Y) = 1.022
Estimator: js, Critic: concat, I(X;Y) = 1.028
Estimator: smile, Critic: separable, I(X;Y) = 1.024
Estimator: smile, Critic: concat, I(X;Y) = 1.034
Estimator: nwj_direct, Critic: separable, I(X;Y) = 1.043
Estimator: nwj_direct, Critic: concat, I(X;Y) = 1.048
Estimator: mine_direct, Critic: separable, I(X;Y) = -9.225
Estimator: mine_direct, Critic: concat, I(X;Y) = -0.257
Estimator: js_direct, Critic: separable, I(X;Y) = 1.026
Estimator: js_direct, Critic: concat, I(X;Y) = 1.035
