# Benchmarking

<img align="right" src="https://movingpandas.github.io/movingpandas/assets/img/movingpandas.png">



In [None]:
import urllib
import os
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from matplotlib import pyplot as plt

import sys

sys.path.append("..")
import movingpandas as mpd

mpd.show_versions()

In [None]:
os.cpu_count()

In [None]:
import time 
import random

def make_test_df(n):
    start = datetime(2023, 1, 1)
    data = {
        "t": [start + timedelta(seconds=i) for i in range(n)],
        "x": [i * 0.0001 for i in range(n)],
        "y": [i * 0.0001 for i in range(n)],
        "id": [random.randint(0, 15) for _ in range(n)],
        "value": ['a' for _ in range(n)],
    }
    return pd.DataFrame(data)


## Basic Trajectory Creation

In [None]:

def benchmark():
    sizes = [100_000, 1_000_000, 5_000_000, 10_000_000]

    for n in sizes:
        df = make_test_df(n)
        print(f"\n--- {n/1_000_000} million points ---")

        start = time.time()
        tc = mpd.TrajectoryCollection(df, traj_id_col="id", t="t", x="x", y="y")
        runtime = time.time() - start
        print(tc)
        print(f"Trajectory init:  {runtime:.6f}s")

benchmark()



### Value Change Splitter

In [None]:

def benchmark():
    sizes = [10_000, 100_000, 500_000]

    for n in sizes:
        df = make_test_df(n)
        print(f"\n--- {n/1_000_000} million points ---")

        start = time.time()        
        tc = mpd.TrajectoryCollection(df, traj_id_col="id", t="t", x="x", y="y")
        tc = mpd.ValueChangeSplitter(tc).split(col_name='value')
        runtime = time.time() - start
        print(tc)
        print(f"Trajectory init + ValueChangeSplitter:  {runtime:.6f}s")

benchmark()



### with parallel processing

In [None]:
def benchmark():
    sizes = [10_000, 100_000, 500_000]

    for n in sizes:
        df = make_test_df(n)
        print(f"\n--- {n/1_000_000} million points ---")

        start = time.time()        
        tc = mpd.TrajectoryCollection(df, traj_id_col="id", t="t", x="x", y="y", min_length=1000)
        tc = mpd.ValueChangeSplitter(tc).split(col_name='value', n_processes=1)
        runtime = time.time() - start
        print(tc)
        print(f"Trajectory init + ValueChangeSplitter:  {runtime:.6f}s")

benchmark()


## Plot

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Benchmark data for the 0.5 column
data = {
    "Traj init": [
        
        "Lazy Init", "Lazy Init", "Lazy Init", "Lazy Init", "Lazy Init", "Lazy Init",
        "Lazy Init", "Lazy Init", "Lazy Init",
        "Original Init", "Original Init", "Original Init", "Original Init"
    ],
    "Splitter": [
       
        "No min_length", "min_length=1,000", "min_length=1,000, early skip",
        "min_length=100,000", "min_length=100,000, early skip",
        "min_length=10,000,000", "min_length=10,000,000, early skip",
        "min_length=10,000,000", "No min_length, parallel", 
        "No min_length", "min_length=1,000", "min_length=100,000", "min_length=10,000,000"
    ],
    "Runtime (s)": [
       
        44.912145, 86.674723, 41.281583,
        90.314423, 45.494739,
        51.147402, 71.029866,
        51.147402, 12.295473,
        97.446402, 95.853996, 99.708181, 44.724029
    ]
}

df = pd.DataFrame(data)

# Label for each bar
df["Label"] = df["Traj init"] + " - " + df["Splitter"]

# Categorize by color group
def categorize(row):
    if row["Traj init"] == "Original Init":
        return "Original Init"
    elif "early skip" in row["Splitter"]:
        return "Lazy Init with Early Skip"
    else:
        return "Lazy Init"

df["Category"] = df.apply(categorize, axis=1)

# Color mapping
color_map = {
    "Original Init": "#1f77b4",        # blue
    "Lazy Init": "#dd1111",            
    "Lazy Init with Early Skip": "#ff7f0e"  # orange
}
colors = df["Category"].map(color_map)


# Sort by Runtime in descending order
sorted_df = df.sort_values(by="Runtime (s)", ascending=False)

# Update colors based on new dataframe
sorted_colors = sorted_df["Category"].map(color_map)

# Plotting
plt.figure(figsize=(10, 5))
bars = plt.barh(sorted_df["Label"], sorted_df["Runtime (s)"], color=sorted_colors)
plt.xlabel("Runtime (seconds)")
plt.title("Benchmark Runtimes for 500k Points")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
