In [1]:
import numpy as np
import sys 
sys.path.append('../../../')
from stateinterpreter.numerical_utils import gaussian_kde
import matplotlib.pyplot as plt
from statistics import mean
from scipy.optimize import minimize
from tqdm import tqdm

In [10]:
dims = 2
mean_val = np.random.rand(2)*10
cov = np.random.rand(2,2)
cov = cov@cov.T
size = 100000
samples = np.random.multivariate_normal(mean_val, cov, size=size)
test =  np.random.multivariate_normal(mean_val, cov, size=size)

In [11]:
KDE = gaussian_kde(samples)
KDE(test, logpdf=True, cython = True)

array([-3.72308206, -5.97274026, -4.07112737, ..., -4.71864545,
       -6.95977993, -4.15327203])

In [12]:
KDE(test, logpdf=True, cython = False)

array([-3.72308206, -5.97274026, -4.07112737, ..., -4.71864545,
       -6.95977993, -4.15327203])

In [None]:
plt.plot(samples[:,0], samples[:,1], 'k.')
plt.plot(mean_val[0], mean_val[1], 'wx')
plt.axis('equal')
plt.margins(x=0)

In [None]:
logpdf = True
sizes = np.arange(0, 100000, 5000)[1:]
residuals = []
for size in tqdm(sizes):
    subsample_idxs = np.random.randint(0,len(samples) -1, size=size)
    subsample = samples[subsample_idxs]
    KDE = gaussian_kde(subsample)
    jac = lambda x: -KDE.grad(x, logpdf=logpdf)
    objective = lambda x: -KDE(x, logpdf=logpdf)
    res = minimize(objective, [-2,-1], jac=jac, tol=1e-8)
    residuals.append(res.x)
res = np.asarray(residuals)
res_nrm = np.linalg.norm(res, axis=1)

In [2]:
def test_gaussian_kde(n_centers = 1000, n_dims = 3, n_pts = 1000, logpdf = False, cython = False):
    """Sample test, will always pass so long as import statement worked."""
    rand_dataset = np.random.rand(n_centers, n_dims)
    rand_points = np.random.rand(n_pts, n_dims)
    #Test 
    KDE = gaussian_kde(rand_dataset)
    return KDE(rand_points, logpdf=logpdf, cython=cython)

In [4]:
n_centers_vals = np.arange(1000,30000,1000)
time_log = []
time_std = []
#n_pts = 1
n_centers = 10000
for n_pts in n_centers_vals:
    val = %timeit -o test_gaussian_kde(n_centers = n_centers, n_dims = 3, n_pts = n_pts, logpdf = False, cython = False)
    time_std.append(mean(val.timings))
    #val = %timeit -o test_gaussian_kde(n_centers = n_centers, n_dims = 3, n_pts = n_pts, logpdf = True)
    #time_log.append(mean(val.timings))

263 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
477 ms ± 4.55 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


KeyboardInterrupt: 

In [None]:
%timeit -o test_gaussian_kde(n_centers = 30000, n_dims = 2, n_pts = 300, logpdf = True)

In [None]:
plt.plot(n_centers_vals, time_log, 'k-')
plt.plot(n_centers_vals, time_log, 'ko')
plt.plot(n_centers_vals, time_std, 'k--')
plt.plot(n_centers_vals, time_std, 'ko')
plt.xlim(n_centers_vals[0], n_centers_vals[-1])