In [1]:
from kl_divergence_estimators import scipy_estimator
from scipy.spatial import KDTree
import numpy as np
from time import time


# Precomputing KDTree

When reusing a dataset for multiple runs, the KDTree does not have to be computed every time. It can be precomputed and reused for another run.

In [2]:
# Use case

N = 200000
mu1, mu2, sigma1, sigma2 = 0, 3, 1, 2

Q = np.random.normal(mu2, sigma2, (N, 1))
kd_q = KDTree(Q)

for i in range(10):
    P = np.random.normal(mu1, sigma1, (N, 1))
    print(scipy_estimator(s1=P, s2=Q, k=2, kd_tree_s2=kd_q))

1.4391172398704062
1.4382586999301572
1.4373010643243078
1.4374791076545637
1.4324344623527836
1.4357256597020345
1.4354619501719903
1.4304329402102527
1.4335127985258547
1.4324976586118956


In [3]:
# Testing speedup

t_standard = []
t_precompute = []

for i in range(20):
    Q = np.random.normal(mu2, sigma2, (N, 1))
    P = np.random.normal(mu1, sigma1, (N, 1))
    
    t0 = time()
    e = scipy_estimator(P, Q, k=2)
    t_standard.append(time()-t0)

    kd_q = KDTree(Q)
    t0 = time()
    e = scipy_estimator(s1=P, s2=Q, k=2, kd_tree_s2=kd_q)
    t_precompute.append(time()-t0)
    
print(f"Standard mean: {np.mean(t_standard):.3f}s, std:{np.std(t_standard):.3f}s")
print(f"Precompute mean: {np.mean(t_precompute):.3f}s, std:{np.std(t_precompute):.3f}s")

t_standard, t_precompute = np.mean(t_standard), np.mean(t_precompute)

print(f"Average Speedup of {((t_standard-t_precompute)*100)/t_standard:.1f}%")

Standard mean: 0.318s, std:0.007s
Precompute mean: 0.282s, std:0.008s
Average Speedup of 11.2%
