In [None]:
import sys
sys.path.insert(0, "..")

from scripts.read_data import read_nov_data

In [None]:
truck_lfs = read_nov_data(data_dir="../data/nov_data")

In [None]:
import polars as pl

lf = truck_lfs["5FT0192"]

# Build a proper Datetime from the Date (filename) + Timestamp (seconds since midnight)
# Find continuous timeframes where iQCMode is any of 2, 3, 4, or 5.
# Switching between these values does NOT break the segment.
# A new segment starts only when the gap between consecutive active rows > 1s.
active_segments = (
    lf.select("Date", "Timestamp", "iQC1.iQCMode")
    .drop_nulls()
    .filter(pl.col("iQC1.iQCMode").is_in([2, 3, 4, 5]))
    .with_columns(
        (
            pl.col("Date").str.to_date("%Y-%m-%d").dt.timestamp("ms")
            + (pl.col("Timestamp") * 1_000).cast(pl.Int64)
        )
        .cast(pl.Datetime("ms"))
        .alias("Datetime"),
    )
    .sort("Datetime")
    .with_columns(
        (pl.col("Datetime").diff().dt.total_seconds() > 1.0)
        .fill_null(True)
        .cum_sum()
        .alias("segment_id"),
    )
    .group_by("segment_id")
    .agg(
        pl.col("Datetime").min().alias("start"),
        pl.col("Datetime").max().alias("end"),
        pl.col("Datetime").count().alias("row_count"),
        pl.col("iQC1.iQCMode").unique().sort().alias("modes_seen"),
    )
    .with_columns(
        (pl.col("end") - pl.col("start")).alias("duration"),
    )
    .sort("start")
    .drop("segment_id")
    .collect()
)

print(f"{len(active_segments)} continuous timeframes")
active_segments