royerlab · JoOkuma · Sep 30, 2025 · Aug 24, 2025 · Aug 24, 2025 · Aug 25, 2025
diff --git a/benchmarks/graph_backends.py b/benchmarks/graph_backends.py
@@ -142,7 +142,7 @@ def _build_pipeline(
                 EdgeAttr(DEFAULT_ATTR_KEYS.SOLUTION) == True,
             ).subgraph(),
         ),
-        ("assing_tracks", lambda graph: graph.assign_track_ids()),
+        ("assign_tracks", lambda graph: graph.assign_track_ids()),
     ]
 
 

diff --git a/src/tracksdata/graph/_base_graph.py b/src/tracksdata/graph/_base_graph.py
@@ -1092,6 +1092,83 @@ def bbox_spatial_filter(
 
         return BBoxSpatialFilter(self, frame_attr_key=frame_attr_key, bbox_attr_key=bbox_attr_key)
 
+    @abc.abstractmethod
+    def assign_track_ids(
+        self,
+        output_key: str = DEFAULT_ATTR_KEYS.TRACK_ID,
+        reset: bool = True,
+        track_id_offset: int | None = None,
+        node_ids: list[int] | None = None,
+    ) -> rx.PyDiGraph:
+        """
+        Compute and assign track ids to nodes.
+        Parameters
+        ----------
+        output_key : str
+            The key of the output track id attribute.
+        reset : bool
+            Whether to reset the track ids of the graph. If True, the track ids will be reset to -1.
+        track_id_offset : int | None
+            The starting track id, useful when assigning track ids to a subgraph.
+            If None, the track ids will start from 1 or from the maximum existing track id + 1
+            if the output_key already exists and reset is False.
+        node_ids : list[int] | None
+            The node ids to assign track ids to. If None, all nodes are used.
+
+        Returns
+        -------
+        rx.PyDiGraph
+            A compressed graph (parent -> child) with track ids lineage relationships.
+            If node_ids is provided, it will only include linages including those nodes.
+        """
+        raise NotImplementedError(f"{self.__class__.__name__} backend does not support track id assignment.")
+
+    def tracklet_nodes(self, seeds: list[int] | None) -> list[int]:
+        """
+        Compute the non-branching tracklets around the provided seed node_ids.
+
+        Walks forward to successors only through nodes with exactly one successor,
+        and backward to predecessors that also have out_degree == 1, until closure.
+
+        Parameters
+        ----------
+        seeds : list[int]
+            Seed node IDs where to start the closure.
+
+        Returns
+        -------
+        list[int]
+            Sorted unique node IDs forming the closure.
+        """
+        # NOTE: if this function becomes a bottleneck in the future it might be worth having
+        # a specialized version per backend
+        if seeds is None or len(seeds) == 0:
+            return []
+
+        track_node_ids: set[int] = set()
+        active_ids: set[int] = set(seeds)
+
+        while len(active_ids) > 0:
+            track_node_ids.update(active_ids)
+
+            # Successors: only nodes with exactly one successor
+            succ_map = self.successors(node_ids=list(active_ids))
+            successors = [int(df[DEFAULT_ATTR_KEYS.NODE_ID].first()) for df in succ_map.values() if len(df) == 1]
+
+            # Predecessors: only nodes with exactly one predecessor and predecessor out_degree == 1
+            pred_map = self.predecessors(node_ids=list(active_ids))
+            predecessors = [int(df[DEFAULT_ATTR_KEYS.NODE_ID].first()) for df in pred_map.values() if len(df) == 1]
+
+            if len(predecessors) > 0:
+                out_degrees = self.out_degree(predecessors)
+                if isinstance(out_degrees, int):
+                    out_degrees = [out_degrees]
+                predecessors = [node for node, degree in zip(predecessors, out_degrees, strict=True) if degree == 1]
+
+            active_ids = (set(successors) | set(predecessors)) - track_node_ids
+
+        return sorted(track_node_ids)
+
     def tracklet_graph(
         self,
         track_id_key: str = DEFAULT_ATTR_KEYS.TRACK_ID,

diff --git a/src/tracksdata/graph/_graph_view.py b/src/tracksdata/graph/_graph_view.py
@@ -7,7 +7,6 @@
 
 from tracksdata.attrs import AttrComparison
 from tracksdata.constants import DEFAULT_ATTR_KEYS
-from tracksdata.functional._rx import _assign_track_ids
 from tracksdata.graph._base_graph import BaseGraph
 from tracksdata.graph._mapped_graph_mixin import MappedGraphMixin
 from tracksdata.graph._rustworkx_graph import IndexedRXGraph, RustWorkXGraph, RXFilter
@@ -534,52 +533,6 @@ def update_edge_attrs(
             else:
                 self._out_of_sync = True
 
-    def assign_track_ids(
-        self,
-        output_key: str = DEFAULT_ATTR_KEYS.TRACK_ID,
-        reset: bool = True,
-        track_id_offset: int = 1,
-    ) -> rx.PyDiGraph:
-        """
-        Compute and assign track ids to nodes.
-
-        Parameters
-        ----------
-        output_key : str
-            The key of the output track id attribute.
-        reset : bool
-            Whether to reset all track ids before assigning new ones.
-        track_id_offset : int
-            The starting track id, useful when assigning track ids to a subgraph.
-
-        Returns
-        -------
-        rx.PyDiGraph
-            A compressed graph (parent -> child) with track ids lineage relationships.
-        """
-        try:
-            node_ids, track_ids, tracks_graph = _assign_track_ids(self.rx_graph, track_id_offset)
-        except RuntimeError as e:
-            raise RuntimeError(
-                "Are you sure this graph is a valid lineage graph?\n"
-                "This function expects a solved graph.\n"
-                "Often used from `graph.subgraph(edge_attr_filter={'solution': True})`"
-            ) from e
-
-        node_ids = self._map_to_external(node_ids)
-
-        if output_key not in self.node_attr_keys:
-            self.add_node_attr_key(output_key, -1)
-        elif reset:
-            self.update_node_attrs(attrs={output_key: -1})
-
-        self.update_node_attrs(
-            node_ids=node_ids,
-            attrs={output_key: track_ids},
-        )
-
-        return tracks_graph
-
     def in_degree(self, node_ids: list[int] | int | None = None) -> list[int] | int:
         """
         Get the in-degree of a list of nodes.

diff --git a/src/tracksdata/graph/_rustworkx_graph.py b/src/tracksdata/graph/_rustworkx_graph.py
@@ -1086,48 +1086,72 @@ def assign_track_ids(
         self,
         output_key: str = DEFAULT_ATTR_KEYS.TRACK_ID,
         reset: bool = True,
-        track_id_offset: int = 1,
+        track_id_offset: int | None = None,
+        node_ids: list[int] | None = None,
     ) -> rx.PyDiGraph:
-        """
-        Compute and assign track ids to nodes.
-
-        Parameters
-        ----------
-        output_key : str
-            The key of the output track id attribute.
-        reset : bool
-            Whether to reset the track ids of the graph. If True, the track ids will be reset to -1.
-        track_id_offset : int
-            The starting track id, useful when assigning track ids to a subgraph.
+        if node_ids is not None:
+            track_node_ids = set(self.tracklet_nodes(node_ids))
+            return (
+                self.filter(node_ids=list(track_node_ids))
+                .subgraph(node_attr_keys=[output_key], edge_attr_keys=[])
+                .assign_track_ids(
+                    output_key=output_key,
+                    reset=reset,
+                    track_id_offset=track_id_offset,
+                )
+            )
+        else:
+            if output_key not in self.node_attr_keys:
+                self.add_node_attr_key(output_key, -1)
+                previous_id_df = None
+                if track_id_offset is None:
+                    track_id_offset = 1
+            elif reset:
+                self.update_node_attrs(attrs={output_key: -1})
+                previous_id_df = None
+                if track_id_offset is None:
+                    track_id_offset = 1
+            else:
+                previous_id_df = self.node_attrs(attr_keys=[DEFAULT_ATTR_KEYS.NODE_ID, output_key])
+                if track_id_offset is None:
+                    track_id_offset: int = max(previous_id_df[output_key].max(), 0) + 1
 
-        Returns
-        -------
-        rx.PyDiGraph
-            A compressed graph (parent -> child) with track ids lineage relationships.
-        """
-        try:
-            node_ids, track_ids, tracks_graph = _assign_track_ids(self.rx_graph, track_id_offset)
-        except RuntimeError as e:
-            raise RuntimeError(
-                "Are you sure this graph is a valid lineage graph?\n"
-                "This function expects a solved graph.\n"
-                "Often used from `graph.subgraph(edge_attr_filter={'solution': True})`"
-            ) from e
-
-        if output_key not in self.node_attr_keys:
-            self.add_node_attr_key(output_key, -1)
-        elif reset:
-            self.update_node_attrs(node_ids=self.node_ids(), attrs={output_key: -1})
-
-        # node_ids are rustworkx graph ids, therefore we don't need node_id mapping
-        # and we must use RustWorkXGraph for IndexedRXGraph
-        RustWorkXGraph.update_node_attrs(
-            self,
-            node_ids=node_ids,
-            attrs={output_key: track_ids},
-        )
+            try:
+                track_node_ids, track_ids, tracks_graph = _assign_track_ids(self.rx_graph, track_id_offset)
+            except RuntimeError as e:
+                raise RuntimeError(
+                    "Are you sure this graph is a valid lineage graph?\n"
+                    "This function expects a solved graph.\n"
+                    "Often used from `graph.subgraph(edge_attr_filter={'solution': True})`"
+                ) from e
+
+            # For the IndexedRXGraph, we need to map the track_node_ids to the external node ids
+            if hasattr(self, "_map_to_external"):
+                track_node_ids = self._map_to_external(track_node_ids)  # type: ignore
+
+            # mapping to already existing track IDs as much as possible
+            if previous_id_df is not None:
+                new_id_df = pl.DataFrame({DEFAULT_ATTR_KEYS.NODE_ID: track_node_ids, output_key + "_new": track_ids})
+                merged = new_id_df.join(
+                    previous_id_df,
+                    left_on=DEFAULT_ATTR_KEYS.NODE_ID,
+                    right_on=DEFAULT_ATTR_KEYS.NODE_ID,
+                    how="left",
+                ).filter(pl.col(output_key) != -1)
+                if merged.height > 0:
+                    track_id_map = merged.unique(output_key + "_new", keep="first").unique(output_key, keep="first")
+                    track_id_map = dict(zip(track_id_map[output_key + "_new"], track_id_map[output_key], strict=True))
+                else:
+                    track_id_map = {}
+                # Ensure that the result is a list of integers (using numpy integer causes issues with SQLGraph)
+                # Later on, we will make it safe to use numpy integers everywhere for updating attributes.
+                track_ids = [int(track_id_map.get(tid, tid)) for tid in track_ids]  # type: ignore
+            self.update_node_attrs(
+                node_ids=track_node_ids,  # type: ignore
+                attrs={output_key: track_ids},
+            )
 
-        return tracks_graph
+            return tracks_graph
 
     def in_degree(self, node_ids: list[int] | int | None = None) -> list[int] | int:
         """

diff --git a/src/tracksdata/graph/_sql_graph.py b/src/tracksdata/graph/_sql_graph.py
@@ -1138,12 +1138,12 @@ def node_attrs(
                     *[getattr(self.Node, key) for key in attr_keys],
                 )
 
-        nodes_df = pl.read_database(
-            self._raw_query(query),
-            connection=session.connection(),
-        )
-        nodes_df = self._cast_boolean_columns(self.Node, nodes_df)
-        nodes_df = unpickle_bytes_columns(nodes_df)
+            nodes_df = pl.read_database(
+                self._raw_query(query),
+                connection=session.connection(),
+            )
+            nodes_df = self._cast_boolean_columns(self.Node, nodes_df)
+            nodes_df = unpickle_bytes_columns(nodes_df)
 
         # indices are included by default and must be removed
         if attr_keys is not None:
@@ -1376,6 +1376,32 @@ def update_edge_attrs(
     ) -> None:
         self._update_table(self.Edge, edge_ids, DEFAULT_ATTR_KEYS.EDGE_ID, attrs)
 
+    def assign_track_ids(
+        self,
+        output_key: str = DEFAULT_ATTR_KEYS.TRACK_ID,
+        reset: bool = True,
+        track_id_offset: int | None = None,
+        node_ids: list[int] | None = None,
+    ) -> rx.PyDiGraph:
+        if node_ids is not None:
+            track_node_ids = list(set(self.tracklet_nodes(node_ids)))
+        else:
+            track_node_ids = None
+        if output_key in self.node_attr_keys:
+            node_attr_keys = [output_key]
+        else:
+            node_attr_keys = []
+
+        return (
+            self.filter(node_ids=track_node_ids)
+            .subgraph(node_attr_keys=node_attr_keys)
+            .assign_track_ids(
+                output_key=output_key,
+                reset=reset,
+                track_id_offset=track_id_offset,
+            )
+        )
+
     def _get_degree(
         self,
         node_ids: list[int] | int | None,