Imports

In [1]:
from hypdelta import hypdelta
import numpy as np

Utils

In [2]:
def generate_synthetic_points(dimensions, num_points):
    points = np.random.rand(num_points, dimensions)
    return points


def distance(point1, point2):
    diff_squared = np.square(point1 - point2)
    sum_diff_squared = np.sum(diff_squared)
    dist = np.sqrt(sum_diff_squared)
    return dist


def build_dist_matrix(data):
    arr_all_dist = []
    for p in data:
        arr_dist = list(map(lambda x: 0 if (p == x).all() else distance(p, x), data))
        arr_all_dist.append(arr_dist)
    arr_all_dist = np.asarray(arr_all_dist)
    return arr_all_dist


def generate_dists(dim=100):
    # dim = 100
    points = generate_synthetic_points(dim, dim)
    dist_arr = build_dist_matrix(points)
    return dist_arr


def get_far_away_pairs(A, N):
    a = zip(*np.unravel_index(np.argsort(-A.ravel())[:N], A.shape))
    return [(i, j) for (i, j) in a if i < j]


Checks

In [3]:
import numpy.testing as npt

def test_CCL_true_delta():
    dist_matrix = generate_dists(500)

    delta_CCL = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.9)
    delta_true = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="naive")

    return npt.assert_almost_equal(delta_CCL, delta_true)


def test_true_delta_gpu():
    dist_matrix = generate_dists(500)

    delta_true = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="naive")
    delta_true_gpu = hypdelta.hypdelta(dist_matrix, device="gpu", strategy="naive")

    return npt.assert_almost_equal(delta_true, delta_true_gpu)


def test_naive_condenced():
    dist_matrix = generate_dists(500)
    
    delta_condenced = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="condenced", tries=25, heuristic=True)
    delta_CCL = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="naive")


    return npt.assert_almost_equal(delta_CCL, delta_condenced)


def test_GPU_true_delta():
    dist_matrix = generate_dists(500)

    delta_GPU = hypdelta.hypdelta(dist_matrix, device="gpu", strategy="CCL", l=0.2)
    delta_true =  hypdelta.hypdelta(dist_matrix, device="gpu", strategy="naive")

    return npt.assert_almost_equal(delta_GPU, delta_true)

Main

In [4]:
# test_true_delta_gpu()
# test_CCL_true_delta()
# test_naive_condenced()

dist_matrix = generate_dists(1000)
delta_CCL = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.9)
print(delta_CCL)

delta_true = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="naive")
print(delta_true)

delta_condenced = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="condenced", tries=25, heuristic=True)
print(delta_condenced)

# test_GPU_true_delta()

(0.18800939784959989, 10.149026768557174)
{}
(0.18800939784959989, 10.149026768557174)
(0.15364839691100382, 10.149026768557174)


In [5]:
delta_CCL = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.8)

In [6]:
print(delta_CCL)

(0.18800939784959989, 10.149026768557174)


In [7]:
delta_CCL = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.7)
print(delta_CCL)

(0.18800939784959989, 10.149026768557174)


In [13]:
delta_CCL_06 = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.6)
print(delta_CCL_06)

(0.18800939784959989, 10.149026768557174)


In [12]:
delta_CCL_05 = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.5)
print(delta_CCL_05)

(0.18800939784959989, 10.149026768557174)


In [11]:
delta_CCL_04 = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.4)
print(delta_CCL_04)

(0.18800939784959989, 10.149026768557174)


In [10]:
delta_CCL_03 = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.3)
print(delta_CCL_03)

(0.18800939784959989, 10.149026768557174)


In [9]:
delta_CCL_02 = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.2)
print(delta_CCL_02)

(0.18800939784959989, 10.149026768557174)


In [8]:
delta_CCL_01 = hypdelta.hypdelta(dist_matrix, device="cpu", strategy="CCL", l=0.1)
print(delta_CCL_01)

(0.18800939784959989, 10.149026768557174)
