# Intersection / Union Calculation

This example takes as an input a rrd with two entities with 2D Bounding boxes
and computes the value `intersection / union`.

In [None]:
import rerun as rr
from rerun.utilities.datafusion.functions.boxes2d import intersection_over_union_by_path
import pyarrow as pa
from datafusion import DataFrame, SessionContext, col, functions as f, udf

In [None]:
# In this example, you will need a copy of the rrd from the Detect & Track Example.
# Update the file path below accordingly.

original_recording = rr.dataframe.load_recording("/Users/tsaucer/working/detect_and_track_example.rrd")

In [None]:
# In order to turn this into a DataFusion DataFrame, we need to create a view
# that includes our `latest_at` specification. Otherwise the data for the Pinhole
# and the data for the DepthImage will be misaligned.

original_recording_view = (
    original_recording
    .view(index="log_time", contents="/**", include_indicator_columns=True)
    .fill_latest_at()
)

In [None]:
# Set some constants that will be reused below

# entity 16 is the horse in the foreground
# entity 21 is the black car in the foreground

# They have a lot of overlap at the start of the vide
# Around frame #43 the are just touching bounding boxes, then 0 afterwards
# This is what we *should* get as a result

INPUT_ENTITY_1_PATH = "/video/tracked/16"
INPUT_ENTITY_2_PATH = "/video/tracked/21"

OUTPUT_ENTITY_PATH = "/score"

In [None]:
# Filter the selection to retrieve only the required data

required_input_columns = [
    "frame",
    "log_tick",
    "log_time",
    f"{INPUT_ENTITY_1_PATH}:Position2D",
    f"{INPUT_ENTITY_1_PATH}:HalfSize2D",
    f"{INPUT_ENTITY_2_PATH}:Position2D",
    f"{INPUT_ENTITY_2_PATH}:HalfSize2D",
]

In [None]:
# Create the DataFusion context and DataFrame from record batches provided
# by the view above.

batches = [r for r in original_recording_view.select(*required_input_columns)]
ctx = SessionContext()
df = ctx.create_dataframe([batches])


In [None]:
# This is an optional step but it reduces the size of the dataframe to only those
# for which we have changes in the boxes, since they don't happen at every row

df = (
    df
    .filter(col(f"{INPUT_ENTITY_1_PATH}:Position2D").is_not_null())
    .filter(col(f"{INPUT_ENTITY_2_PATH}:Position2D").is_not_null())
    .with_column("prior_pos_1", f.lag(col(f"{INPUT_ENTITY_1_PATH}:Position2D")))
    .with_column("prior_pos_2", f.lag(col(f"{INPUT_ENTITY_2_PATH}:Position2D")))
    .filter(
        (col(f"{INPUT_ENTITY_1_PATH}:Position2D") != col("prior_pos_1"))
        | (col(f"{INPUT_ENTITY_2_PATH}:Position2D") != col("prior_pos_2"))
    )
    .select(*required_input_columns)
)

In [None]:
# Call the Rerun function to compute I/U

df_result = intersection_over_union_by_path(
    df,
    INPUT_ENTITY_1_PATH,
    INPUT_ENTITY_2_PATH,
    OUTPUT_ENTITY_PATH
)

In [None]:
# To visualize the results, create a recording stream

local_rec = rr.RecordingStream("image_extraction")
local_rec.spawn()

In [None]:
# The DataFrame above will contain the original data as well as the generated values.
# We select down to only what we want to send to the viewer.

df_result = df_result.select(
    "frame",
    "log_tick",
    "log_time",
    f"{OUTPUT_ENTITY_PATH}:Scalar",
)

In [None]:
# Send the original data so we can show the new images alongside the original data

local_rec.send_recording(original_recording)

In [None]:
# Convert the DataFusion DataFrame into a pyarrow Table and send it to the viewer

table_result = pa.table(df_result)
rr.dataframe.send_dataframe(table_result, rec=local_rec)