In [1]:
import napari
from pathlib import Path
from trackedit.motile_overwrites import *
from trackedit.TrackEdit_functions import *
from trackedit.DatabaseHandler import DatabaseHandler
from motile_tracker.data_model.actions import AddEdges, DeleteNodes, DeleteEdges, AddNodes


#**********INPUTS*********
working_directory = Path('/home/teun.huijben/Documents/data/Akila/20241003/neuromast4_t851/adjusted/')
db_filename_old = 'data.db'
data_shape_full = [851,73,1024,1024]      #T,(Z),Y,X       (851,73,1024,1024)
scale = (4,1,1)
layer_name = 'ultrack'
allow_overwrite = True      #overwrite existing database/changelog
#*************************

DB_handler = DatabaseHandler(
            db_filename_old = db_filename_old,
            working_directory = working_directory,
            data_shape_full = data_shape_full,
            z_scale = scale[0],
            name = 'ultrack',
            allow_overwrite = allow_overwrite)

#overwrite some motile functions
DeleteNodes._apply = create_db_delete_nodes(DB_handler)
DeleteEdges._apply = create_db_delete_edges(DB_handler)
AddEdges._apply = create_db_add_edges(DB_handler)
AddNodes._apply = create_db_add_nodes(DB_handler)
TracksViewer._refresh = create_tracks_viewer_and_segments_refresh(layer_name=layer_name)


#open napari with TrackEdit
viewer = napari.Viewer()
trackeditclass = TrackEditClass(viewer, databasehandler = DB_handler)
viewer.dims.ndisplay = 3    #3D view

Database copied to: /home/teun.huijben/Documents/data/Akila/20241003/neuromast4_t851/adjusted/data_v1.db
old database: data.db
new database: data_v1.db
new logfile: data_v1_changelog.txt
Making new tracking view controller


# Red flags

In [2]:
DB_handler.df

Unnamed: 0_level_0,track_id,t,z,y,x,id,parent_track_id,parent_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000001,1,1,88.0,580.0,222.0,2000001,-1,-1
3000001,1,2,108.0,587.0,210.0,3000001,-1,2000001
4000003,1,3,112.0,585.0,201.0,4000003,-1,3000001
5000006,1,4,104.0,583.0,209.0,5000006,-1,4000003
6000002,1,5,108.0,590.0,218.0,6000002,-1,5000006
...,...,...,...,...,...,...,...,...
101000027,71,100,140.0,607.0,85.0,101000027,-1,100000027
102000027,71,101,144.0,608.0,81.0,102000027,-1,101000027
103000024,71,102,140.0,605.0,89.0,103000024,-1,102000027
104000021,71,103,136.0,601.0,95.0,104000021,-1,103000024


In [5]:
import numpy as np
# def find_red_flags_in_df(df: pd.DataFrame) -> pd.DataFrame:
#     """
#     Identify tracking red flags, i.e. when cell tracks appear ('added') or disappear ('removed')
#     from one timepoint to the next, and include the corresponding 'id' field from the original dataframe.
    
#     A track is flagged as 'added' if it is present at the current timepoint but not in the previous one,
#     and flagged as 'removed' if it is present at the current timepoint but not in the next one.
    
#     Parameters
#     ----------
#     df : pd.DataFrame
#         DataFrame containing at least 't' (timepoint), 'track_id', and 'id' columns.
    
#     Returns
#     -------
#     pd.DataFrame
#         DataFrame with columns: 'Timepoint', 'track_id', 'event', and 'id'.
#     """
#     # Ensure the timepoints are continuous even if some are missing
#     time_range = range(df['t'].min(), df['t'].max() + 1)
    
#     # Group by timepoint and create a Series mapping each time to its set of track_ids.
#     track_sets = df.groupby('t')['track_id'].agg(set).reindex(time_range, fill_value=set())
    
#     # Precompute a mapping from (timepoint, track_id) to the corresponding "id" field.
#     # Assumes that there is only one row per (t, track_id).
#     mapping = df.set_index(['t', 'track_id'])['id'].to_dict()
    
#     events = []
#     timepoints = list(time_range)
#     for i, t in enumerate(timepoints):
#         current = track_sets[t]
#         # For the first timepoint, treat the previous set as the current set.
#         prev = track_sets[timepoints[i - 1]] if i > 0 else current
#         # For the last timepoint, treat the next set as the current set.
#         nxt = track_sets[timepoints[i + 1]] if i < len(timepoints) - 1 else current
        
#         # Tracks added at time t (present now but not in the previous timepoint)
#         for track in current - prev:
#             events.append({
#                 't': t,
#                 'track_id': track,
#                 'id': mapping.get((t, track)),
#                 'event': 'added',
#             })
#         # Tracks removed at time t (present now but not in the next timepoint)
#         for track in current - nxt:
#             events.append({
#                 't': t,
#                 'track_id': track,
#                 'id': mapping.get((t, track)),
#                 'event': 'removed',
#             })
    
#     return pd.DataFrame(events)


def find_red_flags_in_df2(df: pd.DataFrame) -> pd.DataFrame:
    """
    Identify tracking red flags ('added' or 'removed') from one timepoint to the next,
    while taking cell divisions into account. A cell's appearance is not flagged if:
      - It has a parent (parent_id != -1) that is present in the previous timepoint.
    Similarly, a cell's disappearance is not flagged if:
      - In the next timepoint, there are at least two cells having this cell's id as their parent_id.
    
    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing at least the following columns:
        't', 'track_id', 'id', 'parent_id', and 'parent_track_id'.
    
    Returns
    -------
    pd.DataFrame
        DataFrame with columns: 'Timepoint', 'track_id', 'event', and 'id'.
    """
    from collections import Counter

    # Define a continuous range of timepoints.
    time_range = range(df['t'].min(), df['t'].max() + 1)
    
    # Precompute the set of track_ids for each timepoint.
    track_sets = df.groupby('t')['track_id'].agg(set).reindex(time_range, fill_value=set())
    # Precompute the set of cell ids for each timepoint.
    id_sets = df.groupby('t')['id'].agg(set).reindex(time_range, fill_value=set())
    
    # Precompute mappings from (t, track_id) to the cell's own id and parent_id.
    id_mapping = df.set_index(['t', 'track_id'])['id'].to_dict()
    parent_mapping = df.set_index(['t', 'track_id'])['parent_id'].to_dict()
    
    # For each timepoint, count how many times a given id appears as a parent_id (i.e. number of daughters).
    daughter_counts = {t: {} for t in time_range}
    for t, group in df.groupby('t'):
        daughter_counts[t] = group['parent_id'].value_counts().to_dict()
    
    events = []
    timepoints = list(time_range)
    
    for i, t in enumerate(timepoints):
        current_tracks = track_sets[t]
        # For t=0, use the current set as the "previous" set.
        prev_tracks = track_sets[timepoints[i - 1]] if i > 0 else current_tracks
        # For the last timepoint, use the current set as the "next" set.
        next_tracks = track_sets[timepoints[i + 1]] if i < len(timepoints) - 1 else current_tracks
        
        # Detect "added" events: cells present now but not in the previous timepoint.
        added_tracks = current_tracks - prev_tracks
        for track in added_tracks:
            # Check if this row has a valid parent.
            par = parent_mapping.get((t, track), -1)
            # If the cell has a parent (par != -1) and that parent exists in the previous timepoint,
            # then it is likely due to a division. In that case, skip flagging it.
            if par != -1 and (i > 0 and par in id_sets[timepoints[i - 1]]):
                continue
            events.append({
                't': t,
                'track_id': track,
                'id': id_mapping.get((t, track)),
                'event': 'added',
            })
            
        # Detect "removed" events: cells present now but not in the next timepoint.
        removed_tracks = current_tracks - next_tracks
        for track in removed_tracks:
            cell_id = id_mapping.get((t, track))
            # Check for division: in the next timepoint, if there are 2 or more cells
            # with parent_id equal to this cell's id, skip flagging.
            if i < len(timepoints) - 1:
                daughters = daughter_counts.get(timepoints[i + 1], {})
                if daughters.get(cell_id, 0) >= 2:
                    continue
            events.append({
                't': t,
                'track_id': track,
                'id': cell_id,
                'event': 'removed',
            })
    
    return pd.DataFrame(events)


df = DB_handler.db_to_df(entire_database=True)
red_flags = find_red_flags_in_df2(df)
display(red_flags)
print(red_flags.shape)

Unnamed: 0,t,track_id,id,event
0,1,1,2000001,added
1,1,3,2000002,added
2,1,4,2000003,added
3,1,5,2000004,added
4,1,14,2000005,added
...,...,...,...,...
143,825,142,826000029,added
144,826,140,827000020,added
145,826,141,827000028,added
146,837,140,838000023,removed


(148, 4)


ToDo: 
- want red flags of entire dataset!