In [None]:
import tifffile
import numpy as np
import cc3d
from scipy import ndimage
from pathlib import Path
import os

In [None]:
input_path = r"/research/sharedresources/cbi/data_exchange/dyergrp/retinal_degeneration/Version_4_underdev/Outputs_DL_CBI_9-20-23/2_Full_execution_outputs/"
path_to_look_at_to_run_onlyfor_those= r"/research/sharedresources/cbi/data_exchange/dyergrp/retinal_degeneration/Version_4_underdev/Outputs_DL_CBI_9-20-23/3_input_subgroups/1"

In [None]:
def extract_largest_component(volume, label, min_volume_threshold):
    """
    Extract the largest connected component of a given label using cc3d.
    Set to 0 if below volume threshold.
    """
    label_mask = (volume == label)
    labels_out, N = cc3d.connected_components(label_mask, return_N=True)
    
    if N == 0:
        return volume
    
    stats = cc3d.statistics(labels_out)
    component_sizes = [stats['voxel_counts'][i] for i in range(1, N + 1)]
    
    output = volume.copy()
    output[volume == label] = 0
    
    if len(component_sizes) > 0:
        largest_size = max(component_sizes)
        if largest_size >= min_volume_threshold:
            largest_component_idx = np.argmax(component_sizes) + 1
            output[labels_out == largest_component_idx] = label
            
    return output

def fill_horizontal_zeros(slice_2d):
    """
    Fill zero islands in a 2D slice using horizontal neighbors based on midpoint.
    """
    filled_slice = slice_2d.copy()
    y_dim, x_dim = slice_2d.shape
    mid_point = x_dim // 2
    
    for y in range(y_dim):
        row = slice_2d[y]
        zero_positions = np.where(row == 0)[0]
        
        for x in zero_positions:
            left_values = row[:x]
            right_values = row[x+1:]
            
            left_labels = left_values[left_values != 0]
            right_labels = right_values[right_values != 0]
            
            if x >= mid_point:  # In right half of image
                if len(left_labels) > 0:
                    filled_slice[y, x] = left_labels[-1]
                elif len(right_labels) > 0:
                    filled_slice[y, x] = right_labels[0]
            else:  # In left half of image
                if len(right_labels) > 0:
                    filled_slice[y, x] = right_labels[0]
                elif len(left_labels) > 0:
                    filled_slice[y, x] = left_labels[-1]
    
    return filled_slice

def fill_vertical_zeros(slice_2d):
    """
    Fill remaining zeros in each column with nearest non-zero value from above.
    """
    filled_slice = slice_2d.copy()
    y_dim, x_dim = slice_2d.shape
    
    # Process each column
    for x in range(x_dim):
        # Find zero positions in this column
        column = slice_2d[:, x]
        zero_positions = np.where(column == 0)[0]
        
        for y in zero_positions:
            # Look at values above this position
            values_above = column[:y]
            non_zero_above = values_above[values_above != 0]
            
            if len(non_zero_above) > 0:
                # Fill with nearest non-zero value from above
                filled_slice[y, x] = non_zero_above[-1]
    
    return filled_slice

def reverse_DAPI_cleanup(volume, min_volume_thresholds=None):
    """
    Clean up DAPI volume starting from highest label, 
    keeping only largest components above threshold,
    then fill zero islands horizontally and vertically.
    """
    if min_volume_thresholds is None:
        min_volume_thresholds = {label: 90000 for label in range(1, 8)}
    
    # Process labels in reverse order
    cleaned_volume = volume.copy()
    for label in range(7, 0, -1):
        print(f"Processing label {label}...")
        cleaned_volume = extract_largest_component(
            cleaned_volume, 
            label, 
            min_volume_thresholds[label]
        )
    
    print("Filling zero islands horizontally...")
    z_dim, y_dim, x_dim = cleaned_volume.shape
    for z in range(z_dim):
        if z % 10 == 0:
            print(f"Processing slice {z}/{z_dim}")
        cleaned_volume[z] = fill_horizontal_zeros(cleaned_volume[z])
    
    print("Filling remaining zeros vertically...")
    for z in range(z_dim):
        if z % 10 == 0:
            print(f"Processing slice {z}/{z_dim}")
        cleaned_volume[z] = fill_vertical_zeros(cleaned_volume[z])
    
    return cleaned_volume

def check_volume_stats(volume):
    """
    Print statistics about label volumes.
    """
    unique_labels, counts = np.unique(volume, return_counts=True)
    total_voxels = volume.size
    
    print("\nLabel Statistics:")
    for label, count in zip(unique_labels, counts):
        percentage = (count / total_voxels) * 100
        print(f"Label {label}: {count:,} voxels ({percentage:.2f}%)")

In [None]:
def process_dapi_cleanup_paths(input_dir, path_to_look_at_to_run_onlyfor_those):
    # Initialize empty lists for input and result paths
    dapi_cleanup_input_paths = []
    dapi_cleanup_result_paths = []
    dapi_xy_corrected_results_paths = []
    
    # Convert input directory to Path object
    input_path = Path(input_dir)
    lookup_path = Path(path_to_look_at_to_run_onlyfor_those)
    
    # Get list of folder names from the lookup path
    folders_to_process = set()
    for item in lookup_path.iterdir():
        if item.is_dir():
            folders_to_process.add(item.name)
    
    print("\nFolders found in lookup path:", folders_to_process)
    
    # Walk through all directories and subdirectories
    for root, dirs, files in os.walk(input_path):
        root_path = Path(root)
        
        # Check if this is a DAPI_results directory
        if root_path.name == "DAPI_results":
            parent_folder_name = root_path.parent.name
            
            # Check if parent folder name is in our target list
            if parent_folder_name not in folders_to_process:
                #print(f"\nSkipped - Folder not in lookup list: {parent_folder_name}")
                #print(f"Path: {root_path}")
                continue
                
            # Check files existence
            has_original = "C4-DAPI-XZ_reconstructed.tif" in files
            has_cleaned = "C4-DAPI-XZ_reconstructed_cleaned.tif" in files
            
            if not has_original:
                #print(f"\nSkipped - Original file missing in: {parent_folder_name}")
                #print(f"Path: {root_path}")
                continue
                
            if has_cleaned:
                print(f"\nSkipped - Cleaned file already exists in: {parent_folder_name}")
                print(f"Path: {root_path}")
                continue
            
            # If we get here, we're processing this path
            #print(f"\nProcessing: {parent_folder_name}")
            #print(f"Path: {root_path}")
            
            # Create full path for input file
            input_file_path = root_path / "C4-DAPI-XZ_reconstructed.tif"
            # Create full path for output file in the same folder
            result_file_path = root_path / "C4-DAPI-XZ_reconstructed_cleaned.tif"
            # xy correction
            xy_corrected_path = root_path / "C4-DAPI-XZ_reconstructed_cleaned_xy.tif"
            
            # Append paths to respective lists
            dapi_cleanup_input_paths.append(str(input_file_path))
            dapi_cleanup_result_paths.append(str(result_file_path))
            dapi_xy_corrected_results_paths.append(str(xy_corrected_path))
    
    # Print summary at the end
    print("\nSummary:")
    print(f"Total paths to process: {len(dapi_cleanup_input_paths)}")
    
    return dapi_cleanup_input_paths, dapi_cleanup_result_paths, dapi_xy_corrected_results_paths

In [None]:
dapi_cleanup_input_paths, dapi_cleanup_result_paths, dapi_xy_corrected_results_paths = process_dapi_cleanup_paths(input_path,path_to_look_at_to_run_onlyfor_those)
print(dapi_cleanup_input_paths)
print(len(dapi_cleanup_input_paths))

In [None]:
def convert_xz_to_xy(volume):
    return np.transpose(volume, (1, 0, 2))

In [None]:
for tif_file_path, results_path, xy_corrected_results_path in zip(dapi_cleanup_input_paths, dapi_cleanup_result_paths, dapi_xy_corrected_results_paths):
    volume = tifffile.imread(tif_file_path)
    print("volume reading complete ...")
    cleaned_volume = reverse_DAPI_cleanup(volume)
    print("Cleanup successful")
    tifffile.imwrite(results_path, cleaned_volume)
    xy_final_volume = convert_xz_to_xy(cleaned_volume)
    tifffile.imwrite(xy_corrected_results_path, xy_final_volume)

## Single Sample Execution

In [None]:
#input_path = "/research/sharedresources/cbi/data_exchange/dyergrp/retinal_degeneration/Version_4_underdev/Outputs_DL6-14-24-21_FM_SickleCell/3_experiments/dapi_Experiment/C-TxRBCX706F_AcquisitionBlock1_series4/DAPI_results/C4-DAPI-XZ_reconstructed.tif"
#output_path = "/research/sharedresources/cbi/data_exchange/dyergrp/retinal_degeneration/Version_4_underdev/Outputs_DL6-14-24-21_FM_SickleCell/3_experiments/dapi_Experiment/C-TxRBCX706F_AcquisitionBlock1_series4/DAPI_results/cleaned_volume_3.tif"

In [None]:
"""
import tifffile
volume = tifffile.imread(input_path)
print("volume reading complete ...")
#cleaned_volume = DAPI_cleanup(volume)
cleaned_volume = reverse_DAPI_cleanup(volume)
print("Cleanup successful")
tifffile.imwrite(output_path, cleaned_volume)
print(f"Cleaned volume saved to {output_path}")
"""

In [None]:
import os
from pathlib import Path
import tifffile

def check_dapi_files(input_path):
    # List to store paths with incorrect shapes
    incorrect_shape_paths = []
    
    # Walk through all directories and subdirectories
    for root, dirs, files in os.walk(input_path):
        # Convert current root to Path object
        root_path = Path(root)
        
        # Check if we're in an isotropic_image folder
        if root_path.name == "isotropic_image":
            # Look for C4-DAPI-XZ.tif in files
            if "C4-DAPI-XZ.tif" in files:
                # Get the full path to the DAPI XZ image
                dapi_path = root_path / "C4-DAPI-XZ.tif"
                
                try:
                    # Read the TIFF file
                    with tifffile.TiffFile(dapi_path) as tif:
                        image = tif.asarray()
                        
                    # Check the shape
                    # image shape should be (819, z, 819) for (x,z,y)
                    if image.shape[0] != 819 or image.shape[2] != 819:
                        incorrect_shape_paths.append(str(dapi_path))
                        print(f"Incorrect shape found in {dapi_path}")
                        print(f"Shape is {image.shape}, expected (819, z, 819)")
                        print(f"Current dimensions: x={image.shape[0]}, z={image.shape[1]}, y={image.shape[2]}")
                
                except Exception as e:
                    print(f"Error reading file {dapi_path}: {str(e)}")
    
    # Print summary of findings
    if incorrect_shape_paths:
        print("\nFiles with incorrect shapes:")
        for path in incorrect_shape_paths:
            print(path)
    else:
        print("\nAll found files have correct shape (819, z, 819)")
    
    return incorrect_shape_paths

In [None]:
check_dapi_files("/research/sharedresources/cbi/data_exchange/dyergrp/retinal_degeneration/Version_4_underdev/Outputs_DL6-14-24-21_FM_SickleCell_Controls/1_preprocessing_and_isotropization")