In [1]:
!pip install --quiet scipy==1.9.3 altair==4.2.0 watermark

In [2]:
from watermark import watermark

print(watermark())


Last updated: 2022-12-30T15:28:08.030418-05:00

Python implementation: CPython
Python version       : 3.8.15
IPython version      : 8.7.0

Compiler    : Clang 14.0.6 
OS          : Darwin
Release     : 21.2.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit



In [3]:
from scipy.spatial import Voronoi
from voronoiville import voronoi, BoundingBox
import numpy as np
import altair as alt
import pandas as pd


In [4]:
timing_benchmarks = []
for size in [100, 500, 1_000, 10_000, 100_000, 500_000, 1_000_000]:
    scale = 100
    points = np.random.random(size=(size, 2)) * scale
    points_list = list(map(tuple, points.tolist()))
    time_scipy = %timeit -o Voronoi(points)
    time_vville = %timeit -o voronoi(points_list, BoundingBox(0, 0, scale, scale))
    time_vville_nn = %timeit -o voronoi(points_list, BoundingBox(0, 0, scale, scale), return_neighbors=False)
    data = {"size" : size, "time_scipy": time_scipy, "time_vville": time_vville, "time_vville_nn": time_vville_nn}
    timing_benchmarks.append(data)

239 µs ± 2.28 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
59.7 µs ± 590 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
50.3 µs ± 904 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
1.43 ms ± 28.5 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
341 µs ± 2.24 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
290 µs ± 3.43 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
3.35 ms ± 699 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
705 µs ± 13 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
614 µs ± 20.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
32.9 ms ± 1.41 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
8.15 ms ± 181 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.08 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
501 ms ± 26.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
129 ms ± 11.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops

In [5]:
benchmark_timings = []
for run in timing_benchmarks:
    for key in ["time_scipy", "time_vville", "time_vville_nn"]:
        for timing in run[key].timings:
            timings_data = {"size": run["size"], "source": key, "time": timing}
            benchmark_timings.append(timings_data)
timing_df = pd.DataFrame(benchmark_timings)


In [6]:
points = (
    alt.Chart(timing_df)
    .mark_circle(opacity=0.5)
    .encode(x="size", y=alt.Y("time", title="time(s)"), color="source")
)
mean = (
    alt.Chart(timing_df)
    .mark_line()
    .encode(x=alt.X("size"), y=alt.Y("mean(time)", title="time(s)"), color="source")
)
points + mean


  for col_name, dtype in df.dtypes.iteritems():


In [7]:
unstacked_means = timing_df.groupby(["size", "source"]).mean().unstack()
unstacked_means.columns = unstacked_means["time"].columns
for column in unstacked_means:
    name = column[column.find("_") + 1 :]
    unstacked_means[f"scipy_ratio_{name}"] = (
        unstacked_means["time_scipy"] / unstacked_means[column]
    )


In [8]:
ratio_data = (
    unstacked_means.reset_index()[
        ["size", "scipy_ratio_vville", "scipy_ratio_vville_nn"]
    ]
    .set_index("size")
    .stack()
    .reset_index()
    .rename(columns={0: "time"})
)


In [9]:
alt.Chart(ratio_data).mark_line().encode(x="size", y="time", color="source")


  for col_name, dtype in df.dtypes.iteritems():


In [10]:
ratio_data.groupby("source")['time'].mean().round(1)

source
scipy_ratio_vville       4.0
scipy_ratio_vville_nn    5.0
Name: time, dtype: float64