In [None]:
import numpy as np
from scipy.stats import entropy, multivariate_normal

# 1. Load the npy file
data = np.load("path_to_file.npy")

# 2. Binning the Data
# Create a histogram for 2D data
hist, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=[30, 30])  # Choose the bin size that fits your data best
data_distribution = hist.flatten() + 1e-10  # Flatten and add a small constant to avoid zeros

# Normalize the data distribution
data_distribution /= data_distribution.sum()

# Generate a grid over which we evaluate the Gaussians
X, Y = np.meshgrid(xedges[:-1], yedges[:-1])
pos = np.dstack((X, Y))

# 3. Generate the First Reference Distribution
goal_mean1 = [-0.7, 0.7]
cov_matrix1 = [[0.1**2, 0], [0, 0.1**2]]
ref_distribution1 = multivariate_normal.pdf(pos, mean=goal_mean1, cov=cov_matrix1).flatten()
ref_distribution1 /= ref_distribution1.sum()

# 3b. Generate the Second Reference Distribution
goal_mean2 = [0.7, -0.7]
cov_matrix2 = [[0.1**2, 0], [0, 0.1**2]]
ref_distribution2 = multivariate_normal.pdf(pos, mean=goal_mean2, cov=cov_matrix2).flatten()
ref_distribution2 /= ref_distribution2.sum()

# 4. Calculate KL Divergence for both reference distributions
kl_div1 = entropy(data_distribution, ref_distribution1)
kl_div2 = entropy(data_distribution, ref_distribution2)

print("KL Divergence for Goal 1:", kl_div1)
print("KL Divergence for Goal 2:", kl_div2)