# Testing with grid lookup

In [76]:
import numpy as np
from sklearn.neighbors import KernelDensity

# Example 2D data
data = np.random.normal(0, 1, size=(100, 2))

# Step 1: Compute KDE on a 2D grid
kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(data)

# Creating a 2D grid to evaluate KDE
grid_x, grid_y = np.linspace(data[:, 0].min() - 1, data[:, 0].max() + 1, 10000), \
                 np.linspace(data[:, 1].min() - 1, data[:, 1].max() + 1, 10000)
grid_x, grid_y = np.meshgrid(grid_x, grid_y)
grid_points = np.vstack([grid_x.ravel(), grid_y.ravel()]).T

# Evaluate KDE on the grid
grid_density = np.exp(kde.score_samples(grid_points)).reshape(grid_x.shape)

In [81]:
def vectorized_grid_lookup(new_points, grid_x, grid_y, grid_density):
    # Calculate grid spacing
    dx = grid_x[0, 1] - grid_x[0, 0]
    dy = grid_y[1, 0] - grid_y[0, 0]

    # Vectorized computation of indices
    x_indices = np.round((new_points[:, 0] - grid_x[0, 0]) / dx).astype(int)
    y_indices = np.round((new_points[:, 1] - grid_y[0, 0]) / dy).astype(int)

    # Ensure indices are within the bounds of the grid
    x_indices = np.clip(x_indices, 0, grid_x.shape[1] - 1)
    y_indices = np.clip(y_indices, 0, grid_y.shape[0] - 1)

    # Retrieve densities for all points
    return grid_density[y_indices, x_indices]


# Step 3: Estimate density at new points
new_points = data = np.random.normal(0, 1, size=(int(1e7), 2)) # New 2D points to estimate density

In [82]:
estimated_density = vectorized_grid_lookup(new_points, grid_x, grid_y, grid_density)
#print("Estimated densities using direct grid lookup:", estimated_density)

In [83]:

# Using KDE directly for comparison
kde_density = np.exp(kde.score_samples(new_points))  # Using KDE directly, converting log density to density
print("KDE densities:", kde_density)


KDE densities: [0.09835393 0.09398805 0.03307916 ... 0.0054666  0.07072101 0.05918098]


In [84]:
# Calculate average difference
average_difference = np.mean(np.abs(estimated_density - kde_density))
print("Average difference between estimated densities and KDE densities:", average_difference)

Average difference between estimated densities and KDE densities: 1.1556383857985751e-05


# Testing with grid interpolation

### 2D KDE


In [14]:
import numpy as np
from sklearn.neighbors import KernelDensity
from scipy.interpolate import griddata

# Example 2D data
data = np.random.normal(0, 1, size=(100, 2))

# Step 1: Compute KDE on a 2D grid
kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(data)

# Creating a 2D grid to evaluate KDE
grid_x, grid_y = np.linspace(data[:, 0].min() - 1, data[:, 0].max() + 1, 1000), \
                 np.linspace(data[:, 1].min() - 1, data[:, 1].max() + 1, 1000)
grid_x, grid_y = np.meshgrid(grid_x, grid_y)
grid_points = np.vstack([grid_x.ravel(), grid_y.ravel()]).T

# Evaluate KDE on the grid
grid_density = (kde.score_samples(grid_points)).reshape(grid_x.shape)  # log density to density

# Step 2: Store the computed values (grid_points and grid_density)

# Function to estimate density at new points using interpolation in 2D
def estimate_density_2d(new_points, grid_x, grid_y, grid_density):
    return griddata((grid_x.ravel(), grid_y.ravel()), grid_density.ravel(), new_points, method='linear') # Linear is fast and should be good for smooth gaussian kernels

# Step 3: Estimate density at new points
new_points = np.array([[0.5, -0.2], [-0.1, 0.3], [1.3, -0.7]])  # New 2D points to estimate density


In [11]:
estimated_density = estimate_density_2d(new_points, grid_x, grid_y, grid_density)
print("Estimated densities:", estimated_density)

Estimated densities: [-2.22741194 -2.20622228 -2.80384235]


In [10]:

kde_density = kde.score_samples(new_points)  # Using KDE directly
print("KDE densities:", kde_density)

KDE densities: [-2.22740732 -2.2062127  -2.80383375]


### 1D KDE

In [3]:
import numpy as np
from sklearn.neighbors import KernelDensity
from scipy.interpolate import griddata

# Example data
data = np.random.normal(0, 1, size=(100, 1))

# Step 1: Compute KDE on a grid
kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(data)

# Creating a grid to evaluate KDE
grid_min, grid_max = data.min() - 1, data.max() + 1
grid_points = np.linspace(grid_min, grid_max, 1000)[:, np.newaxis]

# Evaluate KDE on the grid
grid_density = (kde.score_samples(grid_points))  # log density to density

# Step 2: Store the computed values (grid_points and grid_density)

# Function to estimate density at new points using interpolation
def estimate_density(new_points, grid_points, grid_density):
    return griddata(grid_points.ravel(), grid_density, new_points, method='linear')

# Step 3: Estimate density at new points
new_points = np.array([0.5, -0.2, 1.3])[:, np.newaxis]  # New points to estimate density
estimated_density = estimate_density(new_points, grid_points, grid_density)
kde_density = kde.score_samples(new_points)  # Using KDE directly

print("Estimated densities:", estimated_density)
print("KDE densities:", kde_density)


Estimated densities: [[-1.17870772]
 [-1.0241159 ]
 [-1.77600527]]
KDE densities: [-1.17870703 -1.02411512 -1.77600509]
