In [None]:
#
# Source:
# https://stackoverflow.com/questions/73222000/polars-conditional-merge-of-rows
#
import datetime as dt
import polars as pl

data = (
    pl.DataFrame(
        {
            "time_start": [ "12:00", "12:20", "12:40", "13:10", "13:15", "13:50", "13:55", "14:50", "15:20", "15:25"],
            "time_end":   [ "12:15", "12:30", "13:00", "13:20", "13:45", "14:00", "14:45", "15:00", "15:30", "15:35"],
            "type":       [ "a",     "a",     "a",     "b",     "b",     "c",     "c",     "a",     "a",     "d"],
        }
    )
).with_columns(
    pl.col("type").cast(pl.Categorical),
    pl.format("2020-01-01T{}:00", "time_start")
      .str.to_datetime()
      .dt.cast_time_unit("ms")
      .alias("time_start"),
    pl.format("2020-01-01T{}:00", "time_end")
      .str.to_datetime()
      .dt.cast_time_unit("ms")
      .alias("time_end"),
)
data

In [None]:
#
# Note -- Replace the "15m" with "0m" to only combine overlapping timeframes
#
# Source:
# https://stackoverflow.com/questions/73222000/polars-conditional-merge-of-rows
#
(
    data
    .sort('time_start')
    .with_columns(
        (
            (
                pl.col('time_end').dt.offset_by('15m') <
                pl.col('time_start').shift(-1)
            ) |
            (
                pl.col('type') != pl.col('type').shift(-1)
            )
        )
        .shift(1, fill_value=False)
        .cum_sum()
        .alias('run_nbr'),
    )
    .group_by('run_nbr')
    .agg(
        pl.col('time_start').min().alias('time_start'),
        pl.col('time_end').max().alias('time_end'),
        pl.col('type').first().alias('type'),
    )
    .sort('time_start')
)

In [None]:
import rtsvg
rt = rtsvg.RACETrack()

df = pl.DataFrame({'fld0':['a','b','c','a','a','a'], 
                   'fld1':[ 1,  1,  1,  2,  2,  1 ],
                   'fld2':['x','x','x','y','z','x'],
                   'ts0': ['2023-01-01 00:00:00', '2023-01-01 00:20:00', '2023-01-01 00:25:00', '2023-01-01 00:05:00', '2023-01-01 00:45:00', '2023-01-01 00:15:00'],
                   'ts1': ['2023-01-01 00:10:00', '2023-01-01 00:30:00', '2023-01-01 00:30:00', '2023-01-01 00:30:00', '2023-01-01 01:00:00', '2023-01-01 00:25:00']})
df = rt.columnsAreTimestamps(df, ['ts0', 'ts1'])
df = df.sort(['fld0','ts0'])
df

In [None]:
rt.polarsGroupOverlappingTimeframes(df, 'ts0', 'ts1', 'fld0')

In [None]:
rt.polarsGroupOverlappingTimeframes(df, 'ts0', 'ts1', ['fld0', 'fld1'])

In [None]:
rt.polarsGroupOverlappingTimeframes(df, 'ts0', 'ts1', ['fld0', 'fld1', 'fld2'])