In [1]:
%pip install --quiet scipy==1.9.3 altair==4.2.0 shapely==2.0.0 watermark 

Note: you may need to restart the kernel to use updated packages.


In [2]:
from watermark import watermark

print(watermark())


Last updated: 2022-12-31T20:30:16.254222-05:00

Python implementation: CPython
Python version       : 3.8.15
IPython version      : 8.7.0

Compiler    : Clang 14.0.6 
OS          : Darwin
Release     : 21.2.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit



In [3]:
from scipy.spatial import Voronoi
from voronoiville import voronoi, BoundingBox
from shapely.ops import voronoi_diagram
import shapely.geometry as geo
import numpy as np
import altair as alt
import pandas as pd


In [4]:
timing_benchmarks = []
for size in [100, 500, 1_000, 10_000, 100_000, 500_000, 1_000_000]:
    scale = 100
    points = np.random.random(size=(size, 2)) * scale
    points_list = list(map(tuple, points.tolist()))
    points_shapely = geo.MultiPoint(points_list)
    time_scipy = %timeit -o Voronoi(points)
    time_shapely = %timeit -o voronoi_diagram(points_shapely)
    time_vville = %timeit -o voronoi(points_list, BoundingBox(0, 0, scale, scale))
    time_vville_nn = %timeit -o voronoi(points_list, BoundingBox(0, 0, scale, scale), return_neighbors=False)
    data = {
        "size": size,
        "time_scipy": time_scipy,
        "time_shapely": time_shapely,
        "time_vville": time_vville,
        "time_vville_nn": time_vville_nn,
    }
    timing_benchmarks.append(data)


240 µs ± 1.93 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
488 µs ± 19.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
59.2 µs ± 278 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
44.8 µs ± 78.1 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
1.35 ms ± 977 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
1.51 ms ± 2.82 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
329 µs ± 2.5 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
255 µs ± 3.39 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
2.76 ms ± 6.86 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.55 ms ± 5.11 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
690 µs ± 2.65 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
546 µs ± 2.12 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
30.1 ms ± 59.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
23.9 ms ± 651 µs per loop (mean ± std. dev. of 7 run

In [5]:
benchmark_timings = []
for run in timing_benchmarks:
    for key in ["time_scipy", "time_shapely", "time_vville", "time_vville_nn"]:
        for timing in run[key].timings:
            timings_data = {"size": run["size"], "source": key, "time": timing}
            benchmark_timings.append(timings_data)
timing_df = pd.DataFrame(benchmark_timings)

In [6]:
points = (
    alt.Chart(timing_df, title="Time to generate Voronoi diagram (log/log)")
    .mark_circle(opacity=0.5)
    .encode(
        x=alt.X("size", title="# Points", scale=alt.Scale(type="log")),
        y=alt.Y("time", title="time(s)", scale=alt.Scale(type="log")),
        color="source",
    )
)
mean = (
    alt.Chart(timing_df)
    .mark_line()
    .encode(
        x=alt.X("size", title="# Points", scale=alt.Scale(type="log")),
        y=alt.Y("mean(time)", title="time(s)", scale=alt.Scale(type="log")),
        color="source",
    )
)
points + mean

  for col_name, dtype in df.dtypes.iteritems():


In [7]:
points = (
    alt.Chart(timing_df, title="Time to generate Voronoi diagram (linear)")
    .mark_circle(opacity=0.5)
    .encode(
        x=alt.X("size", title="# Points"),
        y=alt.Y("time", title="time(s)"),
        color="source",
    )
)
mean = (
    alt.Chart(timing_df)
    .mark_line()
    .encode(
        x=alt.X("size", title="# Points"),
        y=alt.Y("mean(time)", title="time(s)"),
        color="source",
    )
)
points + mean


In [8]:
unstacked_means = timing_df.groupby(["size", "source"]).mean().unstack()
unstacked_means.columns = unstacked_means["time"].columns
for column in unstacked_means:
    name = column[column.find("_") + 1 :]
    unstacked_means[f"scipy_ratio_{name}"] = (
        unstacked_means["time_scipy"] / unstacked_means[column]
    )


In [9]:
ratio_data = (
    unstacked_means.reset_index()[
        ["size", "scipy_ratio_shapely", "scipy_ratio_vville", "scipy_ratio_vville_nn"]
    ]
    .set_index("size")
    .stack()
    .reset_index()
    .rename(columns={0: "ratio"})
)


In [10]:
alt.Chart(
    ratio_data, title="Ratio Time vs. scipy.spatial.Voronoi"
).mark_line().encode(
    x=alt.X("size", title="# Points", scale=alt.Scale(type="log")),
    y=alt.Y("ratio", scale=alt.Scale(type="log")),
    color="source",
)


  for col_name, dtype in df.dtypes.iteritems():


In [11]:
ratio_data.groupby("source")["ratio"].mean().round(1)

source
scipy_ratio_shapely      1.4
scipy_ratio_vville       4.0
scipy_ratio_vville_nn    5.6
Name: ratio, dtype: float64

In [12]:
ratio_data


Unnamed: 0,size,source,ratio
0,100,scipy_ratio_shapely,0.491282
1,100,scipy_ratio_vville,4.052152
2,100,scipy_ratio_vville_nn,5.357145
3,500,scipy_ratio_shapely,0.890479
4,500,scipy_ratio_vville,4.093842
5,500,scipy_ratio_vville_nn,5.297289
6,1000,scipy_ratio_shapely,1.084214
7,1000,scipy_ratio_vville,4.003473
8,1000,scipy_ratio_vville_nn,5.060912
9,10000,scipy_ratio_shapely,1.259491
