### Segment nuclei

In [5]:
### Imports
# Standard library imports
import json
import os
import shutil
import logging
import sys

# Third-party imports
# Data handling
import numpy as np
import pandas as pd

# Image I/O and processing
import tifffile as tiff
from nd2reader import ND2Reader
from skimage.morphology import remove_small_objects

# Deep learning and segmentation
from csbdeep.utils import Path, normalize
from stardist import (
    fill_label_holes,
    random_label_cmap,
    calculate_extents,
    gputools_available,
)
from stardist.matching import matching, matching_dataset
from stardist.models import Config2D, StarDist2D, StarDistData2D
from tensorflow.keras.utils import Sequence

# Tracking
import btrack
from btrack.constants import BayesianUpdates

# Visualization
import matplotlib.cm as cm
import matplotlib.pyplot as plt

# Utilities
from tqdm import tqdm

from datetime import datetime

import pickle

In [6]:
## Variables
## Directory Paths
# Input
IMG_DIR = '/mnt/imaging.data/PertzLab/apoDetection/TIFFs'


APO_DIR = '/mnt/imaging.data/PertzLab/apoDetection/ApoptosisAnnotation'
EXPERIMENT_INFO = '/mnt/imaging.data/PertzLab/apoDetection/List of the experiments.csv'
# Output
MASK_DIR = '../data/apo_masks_test'    # Stardist label predictions
DF_DIR = '../data/summary_dfs_test'
DETAILS_DIR = '../data/details_test'


CSV_DIR = '../data/apo_match_csv'    # File with manual and stardist centroids
CROPS_DIR = '../data/apo_crops_test'    # Directory with .tif files for QC
WINDOWS_DIR = '/home/nbahou/myimaging/apoDet/data/windows_test'    # Directory with crops for scDINO
RANDOM_DIR = os.path.join(WINDOWS_DIR, 'random')
CLASS_DCT_PATH = './extras/class_dicts'


## Processing Configuration
COMPARE_2D_VERS = True
SAVE_DATA = True
USE_GPU = True


LOAD_MASKS = True
MIN_NUC_SIZE = 200

## Tracking Parameters


BT_CONFIG_FILE = "extras/cell_config.json"  # Path to btrack config file
EPS_TRACK = 70         # Tracking radius [px]
TRK_MIN_LEN = 25       # Minimum track length [frames]

#
MAX_TRACKING_DURATION = 20    # In minutes
FRAME_INTERVAL = 5    # minutes between images we want

WINDOW_SIZE = 61


## Logger Set Up
#logging.shutdown()    # For jupyter notebooks
logger = logging.getLogger(__name__)
#if logger.hasHandlers():
#    logger.handlers.clear()
# Get the current timestamp
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define log directory and ensure it exists
log_dir = "./logs"  # Folder for logs
os.makedirs(log_dir, exist_ok=True)  # Create directory if it doesn't exist

log_filename = f"strdst_segment_{timestamp}.log"
log_path = os.path.join(log_dir, log_filename)

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_path),
        logging.StreamHandler(sys.stdout)  # Outputs to console too
    ],
    force = True
)

# Create a logger instance
logger = logging.getLogger(__name__)

# Only forward Warnings/Errors/Critical from btrack
#logging.getLogger('btrack').setLevel(logging.WARNING)

In [7]:
def load_image_stack(path):
    """
    Load an image stack from a file based on its extension.
    
    Uses tifffile for TIFF files and ND2Reader for ND2 files.
    """
    if path.endswith(('.tif', '.tiff')):
        # Load TIFF file using tifffile
        return tiff.imread(path)
    elif path.endswith('.nd2'):
        # Load ND2 file using ND2Reader and convert it to a numpy array
        with ND2Reader(path) as nd2:
            return np.array(nd2)
    else:
        raise ValueError(f"Unsupported file format for file: {path}")

def get_image_paths(directory):
    """
    Returns a list of absolute paths of all TIFF and ND2 files in a directory.
    """
    valid_extensions = ('.tif', '.tiff', '.nd2')
    paths = [
        os.path.abspath(os.path.join(directory, f))
        for f in os.listdir(directory)
        if f.endswith(valid_extensions)
    ]
    return sorted(paths)


def run_segmentation(path, model, axis_norm):
    logger.info("\tStarting with Stardist segmentation.")
    h2b_imgs = load_image_stack(path)
    h2b_imgs_normal = np.asarray([normalize(img, 1, 99.8, axis=axis_norm) for img in h2b_imgs])
    
    gt = []
    details = []
    for x in tqdm(h2b_imgs_normal, desc="Segmenting"):
        labels, det = model.predict_instances(x, n_tiles=model._guess_n_tiles(x), show_tile_progress=False)
        gt.append(labels)
        details.append(det)
    gt = np.asarray(gt)
    logger.info("\t\tSegmentation successful.")
    return gt, details

def filter_segmentation(gt, details):
    logging.info("\tRemoving objects smaller than {MIN_NUC_SIZE}.")
    num_frames = gt.shape[0]
    gt_filtered = np.zeros_like(gt, dtype=np.uint16)
    df_list = []
    for frame in range(num_frames):
        gt_filtered[frame] = remove_small_objects(gt[frame], min_size=MIN_NUC_SIZE)
        unique_ids = np.unique(gt_filtered[frame])
        unique_ids = unique_ids[unique_ids > 0]
        x, y = [], []
        timepoint = np.full_like(unique_ids, frame)
        current_details = details[frame]['points']
        for obj_id in unique_ids:
            position = current_details[obj_id - 1]  # Adjust indexing as needed
            x.append(position[1])
            y.append(position[0])
        current_df = pd.DataFrame({'obj_id': unique_ids, 't': timepoint, 'x': x, 'y': y})
        df_list.append(current_df)
    summary_df = pd.concat(df_list, ignore_index=True)
    logging.info("\t\tDone!")
    return gt_filtered, summary_df

In [8]:
# Load image paths in specified directory
logger.info("Starting Image Processing")
image_paths = get_image_paths(os.path.join(IMG_DIR))
filenames = [os.path.splitext(os.path.basename(path))[0] for path in image_paths[:2]]    ### TODO remove :2 here, was only for testing
logger.info(f"Detected {len(filenames)} files in specified directories.")
#print(filenames)

# Create directories for saving if they do not exist
output_dirs = [MASK_DIR, DF_DIR, DETAILS_DIR]
for path in output_dirs:
    os.makedirs(path, exist_ok=True)

# Set up GT_mask prediction with stardist
if USE_GPU:
    import gputools
    from csbdeep.utils.tf import limit_gpu_memory
    limit_gpu_memory(None, allow_growth=True)
model = StarDist2D.from_pretrained("2D_versatile_fluo")    # Load standard model to create GT
axis_norm = (0,1)    # for normalization


### TODO All of this not used here, tracking and matching
## Lists and counters for evaluation of the matching and cropping process
# Initialize list to collect distances between stardist and manual annotations for evaluation
dist_paolo_stardist = []
dist_alt_matching = []
# Initialize counter for evaluation of num matches/mismatches
num_matches = 0
num_mismatches = 0
# initalize a list to investigate track lengths after apoptosis
survival_times = []

# Loop over all files in target directory (predict labels, track and crop windows for each)
logger.info("Starting Segmentation.")
for path, filename in zip(image_paths, filenames):
    logger.info(f"Processing {filename}")
    # Skip file if not in experiment info csv
    experiment_num = filename.split('_')[0]

    ### Stardist nuclei segmentation
    # Load and normalize image stack
    gt, details = run_segmentation(path, model, axis_norm)

    # Remove small objects and create DF with segmentation info (object_id, t, x, y)
    gt_filtered, summary_df = filter_segmentation(gt, details)
    
    # Save summary df as CSV

    
    # Save labels
    if SAVE_DATA:
        # Save masks
        mask_path = os.path.join(MASK_DIR, f'{filename}.npz')
        np.savez_compressed(mask_path, gt=gt_filtered)
        logger.info(f"\t\tMask saved at: {mask_path}")
        # Save summary df (obj_id, t, x, y for every detected object)
        summary_df.to_csv(os.path.join(DF_DIR, f'{filename}_pd_df.csv'), index=False)
        logger.info(f"\t\tSummary-Df saved at: {os.path.join(DF_DIR, f'{filename}_pd_df.csv')}")

        details_path = os.path.join(DETAILS_DIR, f'{filename}.pkl')
        with open(details_path, 'wb') as f:
            pickle.dump(data, f)

        

2025-03-19 15:35:04,051 - __main__ - INFO - Starting Image Processing
2025-03-19 15:35:04,076 - __main__ - INFO - Detected 2 files in specified directories.
Found model '2D_versatile_fluo' for 'StarDist2D'.
Loading network weights from 'weights_best.h5'.
Loading thresholds from 'thresholds.json'.
Using default values: prob_thresh=0.479071, nms_thresh=0.3.
2025-03-19 15:35:04,452 - __main__ - INFO - Starting Segmentation.
2025-03-19 15:35:04,453 - __main__ - INFO - Processing Exp01_Site01
2025-03-19 15:35:04,454 - __main__ - INFO - 	Starting with Stardist segmentation.


Segmenting: 100%|███████████████████████████████████████████████████████████████████| 1441/1441 [14:23<00:00,  1.67it/s]


2025-03-19 15:49:46,147 - __main__ - INFO - 		Segmentation successful.
2025-03-19 15:49:46,221 - root - INFO - 	Removing objects smaller than {MIN_NUC_SIZE}.
2025-03-19 15:50:17,108 - root - INFO - 		Done!
2025-03-19 15:50:44,986 - __main__ - INFO - 		Mask saved at: ../data/apo_masks_test/Exp01_Site01.npz
2025-03-19 15:50:45,495 - __main__ - INFO - 		Summary-Df saved at: ../data/summary_dfs_test/Exp01_Site01_pd_df.csv
2025-03-19 15:50:46,829 - __main__ - INFO - Processing Exp01_Site02
2025-03-19 15:50:46,832 - __main__ - INFO - 	Starting with Stardist segmentation.


Segmenting: 100%|███████████████████████████████████████████████████████████████████| 1441/1441 [14:27<00:00,  1.66it/s]


2025-03-19 16:05:32,399 - __main__ - INFO - 		Segmentation successful.
2025-03-19 16:05:32,461 - root - INFO - 	Removing objects smaller than {MIN_NUC_SIZE}.
2025-03-19 16:06:02,604 - root - INFO - 		Done!
2025-03-19 16:06:29,740 - __main__ - INFO - 		Mask saved at: ../data/apo_masks_test/Exp01_Site02.npz
2025-03-19 16:06:30,213 - __main__ - INFO - 		Summary-Df saved at: ../data/summary_dfs_test/Exp01_Site02_pd_df.csv


In [12]:
print(details[1])

{'coord': array([[[ 396.      ,  400.2405  ,  404.57584 , ...,  381.84387 ,
          386.7341  ,  391.52927 ],
        [ 252.25966 ,  251.3185  ,  250.70392 , ...,  251.18613 ,
          252.36989 ,  252.47586 ]],

       [[  44.      ,   49.621185,   54.829338, ...,   26.425318,
           32.26152 ,   38.156902],
        [1001.4862  , 1000.25964 ,  998.14435 , ...,  998.30237 ,
         1000.3392  , 1001.37524 ]],

       [[ 326.      ,  330.00217 ,  333.50995 , ...,  314.83997 ,
          318.08173 ,  321.90656 ],
        [ 343.20773 ,  342.12024 ,  340.1306  , ...,  338.70215 ,
          341.1164  ,  342.57913 ]],

       ...,

       [[ 808.      ,  809.12915 ,  810.14105 , ...,  804.179   ,
          805.68335 ,  806.8271  ],
        [1024.1726  , 1023.67676 , 1023.1689  , ..., 1023.7185  ,
         1023.59283 , 1023.8965  ]],

       [[1018.      , 1020.729   , 1023.05084 , ..., 1014.3891  ,
         1014.9016  , 1016.0461  ],
        [ 626.14594 ,  627.7197  ,  626.1938  , ...