In [1]:
import os
import tifffile as tf
import numpy as np
import geopandas as gpd
from shapely.geometry import mapping
from rasterio.features import rasterize
from skimage.transform import resize as img_resize       
import cv2

In [2]:
file_num = 0
image_source = "MIF"   # HE or MIF 
skip_existing = False

# Define original image paths
root_dir = "/rsrch9/home/plm/idso_fa1_pathology/TIER1/paul-xenium/TMP-IL-Pilot/20250515__183240__CIMAC_Validation/registration"
data_dict = {
        "output-XETG00522__0066398__Region_1__20250515__183305":"Xenium H&E Meso1-ICON2 TMA 5-21-2025_matching_orientation.ome.tif" ,
        "output-XETG00522__0066402__Region_1__20250515__183305":"Xenium H&E PCF TMA 5-28-2025_matching_orientation.ome.tif"
        }

xenium_folder = list(data_dict.keys())[file_num]
slide_name = data_dict[xenium_folder]
slide_file = os.path.join(root_dir, slide_name)
morph_file = os.path.join(os.path.dirname(root_dir), xenium_folder, "morphology_focus", "morphology_focus_stack.ome.tif")

# Define annotation paths
he_annot = os.path.join(root_dir, "tma_annotations", slide_name.replace(".ome.tif", "_annot.geojson"))

if "Meso" in os.path.basename(slide_name):
    morph_prefix = "meso1"
elif "PCF" in os.path.basename(slide_name):
    morph_prefix = "pcf"
    
morph_annot = os.path.join(root_dir, "tma_annotations", f"{morph_prefix}_morphology_focus_0000_annot.geojson")

if image_source == "HE":
    geojson_path = he_annot
    img_path = slide_file
else:
    geojson_path = morph_annot
    img_path = morph_file


In [3]:
## Helper functions 

def img_resize(img, scale_factor):
    width  = int(np.floor(img.shape[1] * scale_factor))
    height = int(np.floor(img.shape[0] * scale_factor))
    return cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)

def write_ome_tif(filename, image, channel_names,
                  photometric_interp, metadata, subresolutions):
    
    with tf.TiffWriter(filename, bigtiff=True) as tif:
        px_size_x = metadata['PhysicalSizeX']
        px_size_y = metadata['PhysicalSizeY']

        options = dict(
            photometric=photometric_interp,
            tile=(1024, 1024),
            maxworkers=4,
            compression='lzw',
            resolutionunit='CENTIMETER',
        )

        print("Writing pyramid level 0")
        tif.write(
            image,
            subifds=subresolutions,
            resolution=(1e4 / px_size_x, 1e4 / px_size_y),
            metadata=metadata,
            **options,
        )

        scale = 1
        for i in range(subresolutions):
            scale *= 0.5
            # down‑sample by 2×
            if photometric_interp == 'minisblack':
                if image.shape[0] < image.shape[-1]:
                    image = np.moveaxis(image,0,-1)
                    image = img_resize(image,0.5)
                    image = np.moveaxis(image,-1,0)
            else:
                image = img_resize(image,0.5)

            print("Writing pyramid level {}".format(i+1))
            tif.write(
                image,
                subfiletype=1,
                resolution=(1e4 / scale / px_size_x, 1e4 / scale / px_size_y),
                **options
            )
            
    print("Saved:", filename)


In [4]:
# ── USER SETTINGS ─────────────────────────────────────────────
focus_stack = img_path       # 4‑channel stack
geojson_fp  = geojson_path     # polygons w/ 'name'
if image_source == "MIF":
    out_root    = os.path.join(root_dir, "morphology_focus", morph_prefix) 
else:
    out_root    = os.path.join(root_dir, "HnE", slide_name) 
subres_levels  = 7                                        
os.makedirs(out_root, exist_ok=True)
# ──────────────────────────────────────────────────────────────

In [5]:
print("Reading image...") 

with tf.TiffFile(focus_stack) as tif:
    stack = tif.asarray()                              # (4, H, W)
    photometric = 'rgb' if tif.pages[0].samplesperpixel == 3 else 'minisblack'

    if not tif.is_ome:
        raise RuntimeError("Input is not OME‑TIFF.")

    meta_dict = tf.xml2dict(tif.ome_metadata)
    px_x   = meta_dict['OME']['Image']['Pixels']['PhysicalSizeX']
    px_y   = meta_dict['OME']['Image']['Pixels']['PhysicalSizeY']
    unit   = meta_dict['OME']['Image']['Pixels']['PhysicalSizeXUnit']
    try:
        channel_names = [
            ch['Name'] for ch in meta_dict['OME']['Image']['Pixels']['Channel']
        ]
    except (KeyError, TypeError) as e:
        channel_names = None

metadata = {
    'PhysicalSizeX': px_x,
    'PhysicalSizeXUnit': unit,
    'PhysicalSizeY': px_y,
    'PhysicalSizeYUnit': unit,
    'Channel': {'Name': channel_names},
}

if photometric == "minisblack":
    C, H, W = stack.shape
else: 
    H, W, C = stack.shape
    
print("Finished loading image. Shape: ", stack.shape)

# load core polygons ---------------------------------------
gdf = gpd.read_file(geojson_fp)
# gdf = gdf[gdf['isMissing'] == False].reset_index(drop=True) # only if exporting tma objects instead of annotations

Reading image...
Finished loading image. Shape:  (4, 108707, 48292)


In [6]:
# iterate over cores ---------------------------------------
print("Iterating through cores...")

for idx, row in gdf.iterrows():
    core_id = row.get('name', f'core_{idx + 1}')

    print("Creating empy mask")
    mask_bool = rasterize(
        [(mapping(row.geometry), 1)],
        out_shape=(H, W),
        fill=0,
        dtype='uint8',
    ).astype(bool)

    print("Populating mask...")
    if image_source == "MIF":
        filename = os.path.join(out_root, f"morphology_focus_{core_id}.ome.tif")
        if skip_existing and os.path.exists(filename):
            print(f"{filename} already exists! Skipping")
            continue
        
        masked_stack = np.zeros_like(stack)
        masked_stack[:, mask_bool] = stack[:, mask_bool]
    else:
        filename = os.path.join(out_root, f"he_{core_id}.ome.tif")
        if skip_existing and os.path.exists(filename):
            print(f"{filename} already exists! Skipping")
            continue
        masked_stack = np.ones_like(stack) * 255
        masked_stack[mask_bool, :] = stack[mask_bool,:]
    
    print(f"\n── Writing core {core_id}")
    write_ome_tif(
        filename=filename,
        image=masked_stack,
        channel_names=channel_names,
        photometric_interp=photometric,
        metadata=metadata,
        subresolutions=subres_levels,
    )

print("\nAll cores extracted.")

Iterating through cores...
Creating empy mask
Populating mask...

── Writing core A-4
Writing pyramid level 0
Writing pyramid level 1
Writing pyramid level 2
Writing pyramid level 3
Writing pyramid level 4
Writing pyramid level 5
Writing pyramid level 6
Writing pyramid level 7
Saved: /rsrch9/home/plm/idso_fa1_pathology/TIER1/paul-xenium/TMP-IL-Pilot/20250515__183240__CIMAC_Validation/registration/morphology_focus/meso1/morphology_focus_A-4.ome.tif
Creating empy mask
Populating mask...

── Writing core A-5
Writing pyramid level 0
Writing pyramid level 1
Writing pyramid level 2
Writing pyramid level 3
Writing pyramid level 4
Writing pyramid level 5
Writing pyramid level 6
Writing pyramid level 7
Saved: /rsrch9/home/plm/idso_fa1_pathology/TIER1/paul-xenium/TMP-IL-Pilot/20250515__183240__CIMAC_Validation/registration/morphology_focus/meso1/morphology_focus_A-5.ome.tif
Creating empy mask
Populating mask...

── Writing core A-6
Writing pyramid level 0
Writing pyramid level 1
Writing pyramid 