In [8]:
# Import necessary libraries
import sys
sys.path.extend(["../../../"])
from src.para_voro import *
from src.para_voro_plots import *

np.random.seed(0)

In [18]:
import time

def runtime_experiments(dim: int, n_samples: int, repeat: int = 10) -> tuple:
    timings_vor = []
    timings_ridges_and_clipping = []
    timings_sens_matrix = []

    for _ in range(repeat):
        np.random.seed(0)

        df = pd.DataFrame(data=np.random.random((n_samples, dim)))
        # remove the "Iris-" from class
        df = df.assign(**{"class": pd.Series([f"{v}" for v in (np.random.random((n_samples, )) > 0.5).astype(int)]).values})

        normalize_samples = True

        discrete_values_mapping = create_discrete_values_mapping(df)

        discrete_dims = {
            dim: col_name
            for dim, col_name in enumerate(df.columns[:-1]) if df[col_name].dtype == object
        }

        labels_names = df.values[:, -1]

        df_ = apply_discrete_values_mapping(df[df.columns[:-1]], discrete_values_mapping)

        samples = df_.values
        dim_labels = df_.columns.to_list()

        label_names, uq_idx, l_inv = np.unique(labels_names, return_index=True, return_inverse=True)
        labels = l_inv  # the label we use for computations are integers, while the label names may be strings / ints, floats / objects

        dim_colors = np.array(plt.color_sequences.get("Accent"))[:len(dim_labels)]

        if normalize_samples:
            # normalize samples  # TODO: should rather be [-0.5, 0.5] than [0.0., 1.0]
            mins = np.min(samples, 0)
            maxs = np.max(samples, 0)

            samples = (samples - mins) / (maxs - mins)

        # create discrete_dimension_ticks_and_labels mapping
        discrete_dimension_ticks_and_labels = {}
        for dim in discrete_dims:
            col_name = discrete_dims[dim]

            m = discrete_values_mapping[col_name]
            reverse_mapping = {v: k for k, v in m.items()}

            tick_values = np.linspace(0, 1.0 if normalize_samples else len(m) - 1, len(m))
            tick_labels = [reverse_mapping[idx] for idx in range(len(m))]

            discrete_dimension_ticks_and_labels[dim] = (tick_values, tick_labels)

        t = time.time()
        vor = Voronoi(samples)
        timings_vor.append(time.time() - t)

        aabbox = np.vstack([np.min(samples, 0), np.max(samples, 0)]).T

        t = time.time()
        generate_geometric_segment_boundaries_via_voronoi_result = generate_geometric_segment_boundaries_via_voronoi(samples, labels, aabbox, clip_convex_hull=True, vor=vor, return_original_ridge_point_indices=True, verbose=False)
        ridge_vertices, ridge_point_indices = generate_geometric_segment_boundaries_via_voronoi_result

        M_T = build_directed_ridge_vertices_selector_matrix(samples, labels, ridge_point_indices)

        timings_ridges_and_clipping.append(time.time() - t)

        t = time.time()

        n_samples_per_dim = 30
        step_size = None  # if none, it will be set equal to bandwidth (--> non-overlapping bins)
        bandwidths = (aabbox[:, 1] - aabbox[:, 0]) / n_samples_per_dim
        sample_points_min_max = compute_sample_points(clip_box=aabbox, bandwidths=bandwidths, step_size=step_size)

        n_proc = None  # single-core

        ridge_based_para_sense_preparations_result = ridge_based_para_sense_preparations(ridge_vertices=ridge_vertices,
                                                                                             bandwidths=bandwidths,
                                                                                             clip_box=aabbox,
                                                                                             step_size=step_size,
                                                                                             sample_points_min_max=sample_points_min_max,
                                                                                             verbose=False)
        build_directed_transition_cubes_result = build_directed_transition_cubes(M_T, ridge_vertices, sample_points_min_max=sample_points_min_max, 
                                                         ridge_based_para_sense_preparations_result=ridge_based_para_sense_preparations_result,
                                                         bandwidths=bandwidths, clip_box=aabbox, step_size=step_size, n_proc=n_proc, verbose=False)

        _, dtc_ret = build_directed_transition_cubes_result
        sens_matrix_list = [d[0] for d in dtc_ret]
        n_ridge_matrix_list = [d[1] for d in dtc_ret]

        timings_sens_matrix.append(time.time() - t)
    
    for s, t in zip(["vor", "ridge", "sens"], [timings_vor, timings_ridges_and_clipping, timings_sens_matrix]):
        print(f"dim: {dim}, n_samples: {n_samples}, repeat: {repeat}; type: {s}, average: {np.average(t)}, std: {np.std(t)}, median: {np.median(t)}")
    
    return np.stack([timings_vor, timings_ridges_and_clipping, timings_sens_matrix]).T
    


In [19]:
_ = runtime_experiments(3, 32, repeat=2)

dim: 3, n_samples: 32, repeat: 2; type: vor, average: 0.001500248908996582, std: 0.0005005598068237305, median: 0.001500248908996582
dim: 3, n_samples: 32, repeat: 2; type: ridge, average: 0.04699981212615967, std: 0.0019990205764770508, median: 0.04699981212615967
dim: 3, n_samples: 32, repeat: 2; type: sens, average: 3.8630447387695312, std: 0.013254880905151367, median: 3.8630447387695312


In [16]:
# bandwidth experiment
res = []

for dims in [2, 3, 4]: # , 5, 6, 7, 8]:
    for n_samples in [64, 256, 1048, 4096]:
        res_ = runtime_experiments(dims, n_samples, repeat=3)
        res.append((dims, n_samples, res_))

dim: 2, n_samples: 64, repeat: 3; type: vor, average: 0.0010013580322265625, std: 3.573628609326222e-06, median: 0.00099945068359375
dim: 2, n_samples: 64, repeat: 3; type: ridge, average: 0.028992970784505207, std: 0.0024595432903155943, median: 0.029003381729125977
dim: 2, n_samples: 64, repeat: 3; type: sens, average: 0.028992970784505207, std: 0.0024595432903155943, median: 0.029003381729125977
dim: 2, n_samples: 256, repeat: 3; type: vor, average: 0.002341190973917643, std: 0.00048171041991494623, median: 0.0020008087158203125
dim: 2, n_samples: 256, repeat: 3; type: ridge, average: 0.06731327374776204, std: 0.00048362495560078534, median: 0.06697368621826172
dim: 2, n_samples: 256, repeat: 3; type: sens, average: 0.06731327374776204, std: 0.00048362495560078534, median: 0.06697368621826172
dim: 2, n_samples: 1048, repeat: 3; type: vor, average: 0.006644090016682942, std: 0.00048109593882211327, median: 0.006974935531616211
dim: 2, n_samples: 1048, repeat: 3; type: ridge, average:

TypeError: remove: path should be string, bytes or os.PathLike, not NoneType

Exception ignored in: 'scipy._lib.messagestream.MessageStream.__dealloc__'
Traceback (most recent call last):
  File "messagestream.pyx", line 91, in scipy._lib.messagestream.MessageStream.close
TypeError: remove: path should be string, bytes or os.PathLike, not NoneType


KeyboardInterrupt: 

In [None]:
# samples experiment for dim = 4
res2 = []

for dims in [4]:
    for n_samples in [128, 256, 512, 1024, 2048, 4096]:
        res_ = runtime_experiments(dims, n_samples, repeat=1)
        res2.append((dims, n_samples, res_))

dim: 4, n_samples: 128, repeat: 3; type: vor, average: 0.017665783564249676, std: 0.0020563065326819996, median: 0.018001556396484375
dim: 4, n_samples: 128, repeat: 3; type: ridge, average: 0.6659837563832601, std: 0.018280624705834136, median: 0.6685163974761963
dim: 4, n_samples: 128, repeat: 3; type: sens, average: 50.84448719024658, std: 2.723452833309136, median: 51.96826958656311
dim: 4, n_samples: 256, repeat: 3; type: vor, average: 0.03474203745524088, std: 0.0010518751268548138, median: 0.03400087356567383
dim: 4, n_samples: 256, repeat: 3; type: ridge, average: 1.5819531281789143, std: 0.1113786185576913, median: 1.6508851051330566
dim: 4, n_samples: 256, repeat: 3; type: sens, average: 98.2748195330302, std: 4.437020699769642, median: 95.57184195518494
dim: 4, n_samples: 512, repeat: 3; type: vor, average: 0.08153661092122395, std: 0.01149912282278886, median: 0.08499908447265625
dim: 4, n_samples: 512, repeat: 3; type: ridge, average: 3.8305466175079346, std: 0.37126440631

In [None]:
# dims experiment
res_3 = []

for dims in [2, 3, 4, 5, 6]:
    for n_samples in [75]:
        res_ = runtime_experiments(dims, n_samples, repeat=1)
        res_3.append((dims, n_samples, res_))

dim: 2, n_samples: 75, repeat: 1; type: vor, average: 0.001001119613647461, std: 0.0, median: 0.001001119613647461
dim: 2, n_samples: 75, repeat: 1; type: ridge, average: 0.039998531341552734, std: 0.0, median: 0.039998531341552734
dim: 2, n_samples: 75, repeat: 1; type: sens, average: 0.010999917984008789, std: 0.0, median: 0.010999917984008789
dim: 3, n_samples: 75, repeat: 1; type: vor, average: 0.0019989013671875, std: 0.0, median: 0.0019989013671875
dim: 3, n_samples: 75, repeat: 1; type: ridge, average: 0.08700084686279297, std: 0.0, median: 0.08700084686279297
dim: 3, n_samples: 75, repeat: 1; type: sens, average: 6.634787559509277, std: 0.0, median: 6.634787559509277
dim: 4, n_samples: 75, repeat: 1; type: vor, average: 0.005997419357299805, std: 0.0, median: 0.005997419357299805
dim: 4, n_samples: 75, repeat: 1; type: ridge, average: 0.1990342140197754, std: 0.0, median: 0.1990342140197754
dim: 4, n_samples: 75, repeat: 1; type: sens, average: 22.491682052612305, std: 0.0, med

In [22]:
res

[(2,
  150,
  array([[0.00199962, 0.06200242, 0.01799917],
         [0.00200057, 0.06099892, 0.01799917],
         [0.00199962, 0.05599594, 0.01699972]])),
 (3,
  150,
  array([[3.99947166e-03, 1.57000065e-01, 1.26436238e+01],
         [3.99661064e-03, 1.66006088e-01, 1.08311317e+01],
         [2.99715996e-03, 1.46000862e-01, 1.17336988e+01]])),
 (4,
  150,
  array([[1.69739723e-02, 6.62008047e-01, 4.76276560e+01],
         [1.50015354e-02, 7.18569756e-01, 4.77279878e+01],
         [1.76451206e-02, 6.39996529e-01, 4.67793832e+01]])),
 (5,
  150,
  array([[7.50010014e-02, 4.22915626e+00, 2.32020628e+02],
         [7.80763626e-02, 4.05732107e+00, 2.30513239e+02],
         [8.59966278e-02, 4.02550173e+00, 2.33808345e+02]]))]

In [None]:
import pickle
with open("tmp_runtime_experiments_res_and_res2.pkl", "wb") as f:
    pickle.dump((res, res2), f)

In [None]:
with open("tmp_runtime_experiments_res_and_res3.pkl", "wb") as f:
    pickle.dump((res, res_3), f)