In [None]:
import laplacian_original
import laplacian_tensorflow
import numpy as np
import time

import tensorflow as tf
import tensorflow_probability as tfp

In [None]:
def benchmark_speed(dataset, lsh_kde, L, bandwidth):
    N = dataset.shape[0]
    dim = dataset.shape[1]
    construction_start = time.process_time()
    lsh = lsh_kde(dataset, bandwidth, L)
    construction_end = time.process_time() - construction_start

    n_sample = 10
    test_sample = np.random.choice(N, n_sample, replace=False)
    query_avg = 0.0
    for row_num in test_sample:
        row = dataset[row_num, :]
        query_start = time.process_time()
        lsh.kde(row)
        query_end = time.process_time() - query_start
        query_avg = query_avg + query_end / n_sample
    return construction_end, query_avg

def benchmark_speed_tf(N, dim, lsh_kde, L, bandwidth, dat_type="normal"):
    if dat_type == "normal":
        dataset=tf.random.normal((N, dim))
    else:
        dataset=tf.random.uniform((N, dim))
    construction_start = time.process_time()
    lsh = lsh_kde(dataset, bandwidth, L)
    construction_end = time.process_time() - construction_start

    n_sample = 10
    test_sample = np.random.choice(N, n_sample, replace=False)
    query_avg = 0.0
    for row_num in test_sample:
        row = dataset[row_num, :]
        query_start = time.process_time()
        lsh.kde(row)
        query_end = time.process_time() - query_start
        query_avg = query_avg + query_end / n_sample
    return construction_end, query_avg

## Original Laplacian KDE benchmark (change variable for other experimentation)

In [None]:
# Original
# L=50, D=100~10000, N=10000, band=0.5
# L=50, D=100, N=10000~1000000, band=0.5
# L=10~100, D=100, N=100000, band=0.5


# New
# N:100000 D:50, L 10~100
# N: 10000~1000000 D:50, L:50
# N:100000, D: 10~100, L:50
preprocess_time = []
query_time = []
D_s = []

for i in range(10, 101):
    print(i)
    N = 100000
    D = 100
    D_s.append(i)
    dataset = np.random.normal(size=(N, D))
    #dataset = np.random.uniform(size=(N, D))
    L = i
    bandwidth = 0.5
    lsh_kde =  laplacian_original.FastLaplacianKDE
    construction, query = benchmark_speed(dataset, lsh_kde, L, bandwidth)
    preprocess_time.append(construction)
    query_time.append(query)

In [None]:
import pandas as pd

# write benchmark data into csv

write_dat = {
    "D": D_s,
    "pre": preprocess_time,
    "query": query_time,
}

pd_dat = pd.DataFrame(write_dat)

pd_dat.to_csv("laplacian_tf_D_unif.csv", index=False)

## Our Laplacian KDE benchmark (change variable for other experimentation)

Note, this can also use for benchmark between Gaussian and Laplacian preprocessing/query time

In [None]:
# New
# N:100000 D:50, L 10~100 (finish)
# N: 10000~1000000 D:50, L:50 (finish)
# N:100000, D: 10~100, L:50 (finish)

preprocess_time = []
query_time = []
D_s = []

for i in range(10, 101):
    print(i)
    N = 100000    
    D = i
    D_s.append(i)
    
    L = 50
    bandwidth = 0.5
    lsh_kde = laplacian_tensorflow.FastLaplacianKDE_tf
    #construction, query = benchmark_speed_tf(N, D, lsh_kde, L, bandwidth)
    construction, query = benchmark_speed_tf(N, D, lsh_kde, L, bandwidth, "uniform")
    preprocess_time.append(construction)
    query_time.append(query)

In [None]:
import pandas as pd

write_dat = {
    # Change "D" for other experimentations (such as "L" for LSH copy)
    "D": D_s,
    "pre": preprocess_time,
    "query": query_time,
}

pd_dat = pd.DataFrame(write_dat)

pd_dat.to_csv("laplacian_tf_D_unif.csv", index=False)