# Omnipose Segmentation from ImageJ Macro converted image directories

This file is meant to aid in omnipose segmentation in a reproducible and streamlined way to help with automated image analysis especially early QC to adjust experimental and imaging parameters as needed to optimize S/N for the experiment. 

#### Import Necessary packages and Functions

In [5]:
# Imports for all chunks
import os
import shutil
from aicsimageio.readers.ome_tiff_reader import OmeTiffReader
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from skimage.io import imread, imsave
from pathlib import Path
import time


In [6]:
# omnipose setup and GPU
from cellpose_omni import models, core
import torch
use_GPU = core.use_gpu()
print('>>> GPU activated? {}'.format(use_GPU))

  from .autonotebook import tqdm as notebook_tqdm


2023-12-29 13:14:30,824 [INFO] ** TORCH GPU version installed and working. **
>>> GPU activated? True


In [7]:


import os
import shutil
from aicsimageio.readers.ome_tiff_reader import OmeTiffReader

# Mapping dictionary for renaming channels
channel_map = {'Phase': 'phase', 'Far-Red': 'fish', 'DAPI': 'dapi', 'eGFP' : 'gfp'}

# Root directory
root_dir = r'C:\Users\mattt\Documents\mutants'  #this would be a directory where your biorep level folder is stored

# Navigate through directories to find OME.TIFF files and rename them
for biorep_dir in os.listdir(root_dir):
    biorep_path = os.path.join(root_dir, biorep_dir)
    if os.path.isdir(biorep_path):
        for date_strain_dir in os.listdir(biorep_path):
            date_strain_path = os.path.join(biorep_path, date_strain_dir)
            if os.path.isdir(date_strain_path):
                for sub_dir in os.listdir(date_strain_path):
                    sub_dir_path = os.path.join(date_strain_path, sub_dir)
                    if os.path.isdir(sub_dir_path):
                        for img_data_dir in os.listdir(sub_dir_path):
                            img_data_path = os.path.join(sub_dir_path, img_data_dir)
                            if os.path.isdir(img_data_path):
                                for file in os.listdir(img_data_path):
                                    if file.endswith('.ome.tiff') or file.endswith('.ome.tif'):
                                        file_path = os.path.join(img_data_path, file)
                                        
                                        # Read the OME.TIFF file to get channel names
                                        reader = OmeTiffReader(file_path)
                                        ome_metadata = reader.ome_metadata
                                        channel_names = [channel.name for channel in ome_metadata.images[0].pixels.channels]
                                        
                                        # Rename folders and files based on channel names
                                        for i, channel_name in enumerate(channel_names):
                                            # Map the original channel name to the new name using the channel_map dictionary
                                            mapped_name = channel_map.get(channel_name, channel_name)
                                            
                                            # Create the old and new folder names based on channel index
                                            old_folder_name = f"C{i+1}-MAX_sequence"
                                            new_folder_name = f"{mapped_name}-MAX_sequence"
                                            
                                            # Create the full path to the old and new folder names
                                            old_folder_path = os.path.join(img_data_path, old_folder_name)
                                            new_folder_path = os.path.join(img_data_path, new_folder_name)
                                            
                                            # If the old folder exists, rename it to the new folder name
                                            if os.path.exists(old_folder_path):
                                                shutil.move(old_folder_path, new_folder_path)
                                            
                                            # Rename individual single-page TIFF files inside the new folder
                                            for single_tiff in os.listdir(new_folder_path):
                                                # Check if the file starts with the old channel name
                                                if single_tiff.startswith(f"C{i+1}-MAX"):
                                                    # Create the full path to the old single-page TIFF file
                                                    old_single_tiff_path = os.path.join(new_folder_path, single_tiff)
                                                    
                                                    # Create the new single-page TIFF file name based on mapped channel name
                                                    new_single_tiff_name = single_tiff.replace(f"C{i+1}-MAX", f"{mapped_name}-MAX")
                                                    
                                                    # Create the full path to the new single-page TIFF file
                                                    new_single_tiff_path = os.path.join(new_folder_path, new_single_tiff_name)
                                                    
                                                    # Rename the old single-page TIFF file to the new name
                                                    shutil.move(old_single_tiff_path, new_single_tiff_path)
                                            
                                            # Create old and new multi-page TIFF file names based on channel index
                                            old_file_name = f"C{i+1}-MAX.tif"
                                            new_file_name = f"{mapped_name}-MAX.tif"
                                            
                                            # Create the full path to the old and new multi-page TIFF files
                                            old_file_path = os.path.join(img_data_path, old_file_name)
                                            new_file_path = os.path.join(img_data_path, new_file_name)
                                            
                                            # If the old multi-page TIFF file exists, rename it to the new name
                                            if os.path.exists(old_file_path):
                                                shutil.move(old_file_path, new_file_path)




## Running Omnipose for Segmentation

Here is the incorporation into the omnipose script

### Importing the images and QC to check images match expectations

### Collecting all the tiff files for omnipose


In [8]:
from skimage import io  # Importing the io module from skimage for image reading

# Initialize an empty list to store the full paths of all phase-MAX_sequence TIFF files.
# This list will include both newly renamed and previously renamed phase files.
all_phase_max_sequence_files = []

# Counter for total images
total_images = 0

# Counter for images with issues
issues_counter = 0

# Use os.walk to navigate through the directory tree rooted at root_dir.
# os.walk yields a 3-tuple (dirpath, dirnames, filenames) for each directory it visits.
# dirpath is the path to the current directory, dirnames is a list of subdirectories in the current directory,
# and filenames is a list of filenames in the current directory.

# Loop through the directory structure
for root, dirs, files in os.walk(root_dir):
    for dir in dirs:
        if dir == "phase-MAX_sequence":
            phase_folder_path = os.path.join(root, dir)
            for file in os.listdir(phase_folder_path):
                if file.endswith(".tif"):
                    full_file_path = os.path.join(phase_folder_path, file)
                    all_phase_max_sequence_files.append(full_file_path)
                    
                    # Read the image into an array
                    img = io.imread(full_file_path)
                    
                    # Perform quality checks
                    shape = img.shape
                    dtype = img.dtype
                    min_val, max_val = img.min(), img.max()

                    # Increment the total_images counter
                    total_images += 1

                    #quality control checks here
                    if shape != (512, 512) or min_val < 3500 or max_val > 35000:
                        issues_counter += 1
                        print(f"Warning: Image at {full_file_path} has issues.")
                        print(f"  - Original image shape: {shape}")
                        print(f"  - Data type: {dtype}")
                        print(f"  - Data range: min {min_val}, max {max_val}")

print(f"\nTotal number of images processed: {total_images}")
if issues_counter:
    print(f"Number of images with issues: {issues_counter}")
else:
    print("No issues found in images.")


  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 12753, max 38840
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 12797, max 60449
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 20168, max 35124
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 21608, max 38135
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 22090, max 42633
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 23546, max 36977
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 24837, max 45506
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 26722, max 41771
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 26891, max 41358
  - Original image shape: (512, 512)
  - Data type: uint16
  - Data range: min 22732, max 41475
  - Original image shape: (512, 512)
  -

### Segmentation


In [9]:
from skimage.io import imread, imsave
from skimage import img_as_uint 
import numpy as np
from cellpose_omni import models, utils, io as cellpose_io
from skimage.measure import label, regionprops
from skimage.color import label2rgb
import time
from tifffile import TiffFile, imwrite
from tifffile import TiffFile, imsave
import re

# Check for CUDA-enabled GPU availability
# Uncomment this block when you want to switch to GPU computation

import torch

# Check for GPU availability and set the gpu flag
if torch.cuda.is_available():
    gpu = True
    print("CUDA-enabled GPU found. Switching to GPU mode.")
else:
    gpu = False
    print("No CUDA-enabled GPU found. Running on CPU.")



CUDA-enabled GPU found. Switching to GPU mode.


In [16]:
# Define function to create subdirectories
def create_sub_dirs(sequence_folder):
    sub_dirs = ['masks', 'outlines']
    for sub_dir in sub_dirs:
        sub_dir_path = os.path.join(sequence_folder, sub_dir)
        if not os.path.exists(sub_dir_path):
            os.makedirs(sub_dir_path)

# Define Function for saving multi-page results
def create_output_dirs(output_folder):
    sub_dirs = ['cell_only', 'background_only']
    for sub_dir in sub_dirs:
        sub_dir_path = os.path.join(output_folder, sub_dir)
        if not os.path.exists(sub_dir_path):
            os.makedirs(sub_dir_path)

# Function to extract sequence numbers from filenames
def extract_sequence_number(filename):
    match = re.search(r'-(\d{4})\.tif', filename)
    if match:
        return int(match.group(1))
    else:
        return None


# Function for Extracting the Multipage Tiff within Directory     
def find_multipage_tiff(directory):
    current_dir = directory
    parent_dir = os.path.dirname(current_dir)
    all_files = os.listdir(os.path.dirname(current_dir))
    filtered_files = [f for f in all_files if "LZ222" in f and "ome" not in f]
    return os.path.join(parent_dir, filtered_files[0])

# Initialize model
model_path = r"C:\Users\mattt\Documents\Omni\final_4000_epoch_cellpose_residual_on_style_on_concatenation_off_omni_nclasses_4_omni_retrain_2023_11_01_01_34_28.551587"
model = model = models.CellposeModel(
    pretrained_model=model_path, 
    gpu=gpu, 
    omni=True,
    nchan=2,      # same as training 
    nclasses=3  # same as training
    
)



# define parameters
params = {
    'channels': [0,0],  # Segment based on first channel, no second channel
    'rescale': None,  # upscale or downscale your images, None = no rescaling
    'mask_threshold': -1,  # erode or dilate masks with higher or lower values
    'flow_threshold': 0,  # default is .4, but only needed if there are spurious masks to clean up; slows down output
    'transparency': True,  # transparency in flow output
    'omni': True,  # we can turn off Omnipose mask reconstruction, not advised
    'cluster': True,  # use DBSCAN clustering
    'resample': True,  # whether or not to run dynamics on rescaled grid or original grid
    'verbose': False,  # turn on if you want to see more output
    'tile': False,  # average the outputs from flipped (augmented) images; slower, usually not needed
    'niter': None,  # None lets Omnipose calculate # of Euler iterations (usually <20) but you can tune it for over/under segmentation
    'augment': False,  # Can optionally rotate the image and average outputs, usually not needed
    'affinity_seg': False,  # new feature, stay tuned...
}



## Segmentation and post-processing
for file in sorted(all_phase_max_sequence_files):
    sequence_number = extract_sequence_number(os.path.basename(file))
    image = imread(file)

    try:
        # Apply the model
        masks, flows, styles = model.eval(image, **params)

    except ValueError as e:
        print(f"Skipping file {file} due to error: {e}")
        continue  # This will skip the rest of the current loop iteration and move to the next file
    
    # Generate cell-only and background-only images
    cell_only_image = image * (masks > 0)
    background_only_image = image * (masks == 0)
    
    label_image = label(masks)

    # Create subdirectories for saving within phase-max
    directory = os.path.dirname(file)
    create_sub_dirs(directory)
    filename = os.path.basename(file)
    base_name = os.path.splitext(filename)[0]

     # Find the corresponding multi-page TIFF
    tiff_path = find_multipage_tiff(os.path.dirname(file)) # * i dont know why this is grabbed here seems out of place 
    with TiffFile(tiff_path) as tif: 
        multi_page_tiff = tif.asarray() #read image into a numpy array

    # Initialize output folders
    output_folder_cell_only = os.path.join(os.path.dirname(tiff_path), 'cell_only')
    output_folder_bg_only = os.path.join(os.path.dirname(tiff_path), 'background_only')

    # Create output directories if they don't exist
    create_output_dirs(output_folder_cell_only)
    create_output_dirs(output_folder_bg_only)
    
    sequence_number = sequence_number -1

# Apply the mask to each channel in each timepoint and Z-plane, just use the current mask 
    if sequence_number < multi_page_tiff.shape[0]: 
    
        for z in range(multi_page_tiff.shape[1]):
            for channel in range(multi_page_tiff.shape[2]):
                single_image = multi_page_tiff[sequence_number, z, channel, :, :]
                single_image_cells = single_image * (masks > 0)
                single_image_background = single_image * (masks == 0)
                        
                # Generate the output paths
                output_cell_only_path = os.path.join(output_folder_cell_only, f"Time_{sequence_number}_Z_{z}_Channel_{channel}.tif")
                output_bg_only_path = os.path.join(output_folder_bg_only, f"Time_{sequence_number}_Z_{z}_Channel_{channel}.tif")
                        
                # Save the cell-only and background-only images
                imsave(output_cell_only_path, single_image_cells)
                imsave(output_bg_only_path, single_image_background)
    else:
            print(f"Skipping timepoint {adjusted_timepoint} as it is out of bounds.")

    # Modify the output paths
    output_cell_only_path = os.path.join(directory, 'cell_only', f"{base_name}_cell_only.tif")
    output_background_only_path = os.path.join(directory, 'background_only', f"{base_name}_background_only.tif")
    output_outlines_path = os.path.join(directory, 'outlines', f"{base_name}_outlines.txt")
    output_mask_path = os.path.join(directory, 'masks', f"{base_name}_mask.tif")
    
    # Save the images and outlines
    outlines = utils.outlines_list(masks)
    cellpose_io.outlines_to_text(output_outlines_path, outlines)
    imsave(output_mask_path, masks.astype(np.uint16))

2023-12-28 23:49:05,344 [INFO] ** TORCH GPU version installed and working. **
2023-12-28 23:49:05,346 [INFO] >>>> using GPU


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

Skipping file C:\Users\mattt\Documents\mutants\biorep0\LZ22225_second\Image_Data\20231221_LZ22225_20min_inf.007\phase-MAX_sequence\phase-MAX-0012.tif due to error: Expected n_neighbors <= n_samples,  but n_samples = 23, n_neighbors = 50


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))


2023-12-28 23:50:51,777 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

2023-12-28 23:51:15,680 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

2023-12-28 23:51:22,426 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

2023-12-28 23:53:01,731 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))


2023-12-28 23:53:03,014 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))


2023-12-28 23:53:04,312 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))


2023-12-28 23:53:04,582 [INFO] No cell pixels found.
2023-12-28 23:53:04,950 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

2023-12-28 23:54:12,124 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

2023-12-28 23:55:53,601 [INFO] No cell pixels found.


  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_cells)
  imsave(output_bg_only_path, single_image_background)
  imsave(output_mask_path, masks.astype(np.uint16))
  imsave(output_cell_only_path, single_image_c

# Data Extraction and Analysis

Now that I have all of the images post mask processing in an organized format I can look into reading them into the memory and performing statistics on them. 


#### Intializing Functions for Data Extraction and Metadata Assignments

This is where we extract the bckground vs cell only data and assign them to their correct position in the image. This can be very useful to use live/iteratively while imaging to ensure you are taking good reproducible images.

In [17]:
import pandas as pd
import numpy as np
import scipy.stats
import ast 
import re



def calculate_image_stats(image_path):
    # Read the image
    image = imread(image_path)

    # Check if the image is empty or all zeros
    if image.size == 0 or np.all(image == 0):
        print(f"Warning: The image at {image_path} is empty or all zeros.")
        return None  # Return None to indicate that stats could not be calculated

    # Filter out the zero pixels
    image = image[image > 0]

    # Initialize a dictionary to store the statistics
    stats_dict = {
        'mean': np.mean(image),
        'median': np.median(image),
        'max': np.max(image),
        'min': np.min(image),
        'std_dev': np.std(image),
        'skewness': scipy.stats.skew(image),
        'kurtosis': scipy.stats.kurtosis(image),
        'pixel_count': len(image),
        'area_covered': len(image) / 262144,
        'full_filepath': str(image_path)
    }

    # Extract metadata from the file path
    path_parts = Path(image_path).parts
    file_name = Path(image_path).name

    # Attempt to extract metadata from the filename
    filename_parts = file_name.split('_')
    try:
        stats_dict['frame'] = filename_parts[-3]
        stats_dict['z_stack'] = filename_parts[-2]
        stats_dict['channel'] = filename_parts[-1].split('.')[0]
    except IndexError:
        print(f"Failed to extract frame, z_stack, channel from {file_name}")

    # Attempt to extract metadata from the directory structure
    try:
        # Extracting condition, time, and strain from the directory name
        # which is third from the last in the path
        directory_name = path_parts[-3]
        dir_name_parts = directory_name.split('_')
        stats_dict['condition'] = dir_name_parts[-1].split('.')[0] if dir_name_parts[-1].split('.')[0].isalpha() else 'unknown'
        stats_dict['time'] = dir_name_parts[-2]
        stats_dict['strain'] = dir_name_parts[-3]
    except IndexError:
        print(f"Failed to extract condition, time, strain from {directory_name}")

    # Extract 'image_type' from the directory name which is second from the last in the path
    image_type_directory = path_parts[-2]
    if 'background_only' in image_type_directory:
        stats_dict['image_type'] = 'background'
    elif 'cell_only' in image_type_directory:
        stats_dict['image_type'] = 'cell'
    else:
        stats_dict['image_type'] = 'unknown'

    # Extract 'biorep' from the directory name which is further back in the path
    biorep = next((part for part in reversed(path_parts) if 'biorep' in part.lower()), None)
    if biorep:
        biorep_match = re.search(r'biorep(\d+)', biorep, re.IGNORECASE)
        stats_dict['biorep'] = biorep_match.group(1) if biorep_match else 'unknown'
    else:
        stats_dict['biorep'] = 'unknown'

    return stats_dict


# Function to extract metadata for channel naming
def extract_metadata_channel(root_dir):
    metadata_list = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.ome.tiff') or file.endswith('.ome.tif'):
                full_path = os.path.join(root, file)
                reader = OmeTiffReader(full_path)
                ome_metadata = reader.ome_metadata
                channel_names = [channel.name for channel in ome_metadata.images[0].pixels.channels]
                metadata_list.append({'full_filepath': full_path, 'channel_names': channel_names})
    return pd.DataFrame(metadata_list)

#import re  # Importing the regular expression library

def merge_metadata(df, df_metadata, channel_map=None):
    
    """
    Merge metadata into the main dataframe based on matching 'full_filepath'.
    A new column 'signal' is created based on the corresponding 'channel_names' from df_metadata.
    Optionally, a channel_map can be provided to rename the channels.
    
    Parameters:
    df (DataFrame): The main dataframe containing various columns including 'full_filepath' and 'channel'.
    df_metadata (DataFrame): Metadata dataframe containing 'full_filepath' and 'channel_names'.
    channel_map (dict, optional): A dictionary to map original channel names to new names.
    
    """
    # Initialize an empty list to hold the new 'signal' column values
    signal_list = [None] * len(df)
    
    # Loop through each row in df
    for i, row in df.iterrows():
        matched = False  # Variable to indicate if a match was found
        # Loop through each row in df_metadata
        for j, row_metadata in df_metadata.iterrows():
            # Separate the root from the filename using rsplit
            root_metadata = row_metadata['full_filepath'].rsplit('\\', 1)[0]
            
            # Create a regex pattern for the root
            pattern = re.compile(re.escape(root_metadata))
            
            # Use regex to find if the root exists in 'full_filepath' of df
            if pattern.search(row['full_filepath']):
                matched = True  # A match was found, set the variable to True
                # Check if the 'channel' value can be converted to an integer
                try:
                    channel_index = int(row['channel'])
                except ValueError:
                    print(f"Warning: Could not convert channel value {row['channel']} to integer at row {i}.")
                    continue  # Skip this row and continue with the next one

                # Check the type of channel_names and convert if necessary
                if isinstance(row_metadata['channel_names'], str):
                    try:
                        channel_names = ast.literal_eval(row_metadata['channel_names'])
                    except ValueError as e:
                        print(f"Failed to parse channel_names at row {j} in df_metadata: {e}")
                        continue
                else:
                    channel_names = row_metadata['channel_names']


                    # Check if channel_index is within the range of channel_names
                    if 0 <= channel_index < len(channel_names):
                        signal_name = channel_names[channel_index]
                        
                        # Apply channel_map if provided
                        if channel_map and signal_name in channel_map:
                            signal_name = channel_map[signal_name]
                        
                        signal_list[i] = signal_name
                    else:
                        print(f"Warning: channel_index {channel_index} is out of range for channel_names {channel_names} at row {i}.")
                    break  # No need to continue checking for this row, move on to the next one
                if not matched:
                    print(f"Warning: No metadata match found for row {i} with filepath {row['full_filepath']}.")
        # Add the new 'signal' column to df
        df['signal'] = signal_list




#### Statistics and Metadata Assignment Calculation and loop

In [18]:
# Function definitions for tee_stdout and tee_stderr
import sys
from contextlib import contextmanager

@contextmanager
def tee_stdout(file_object):
    old_stdout = sys.stdout
    sys.stdout = Tee(sys.stdout, file_object)
    try:
        yield
    finally:
        sys.stdout = old_stdout

@contextmanager
def tee_stderr(file_object):
    old_stderr = sys.stderr
    sys.stderr = Tee(sys.stderr, file_object)
    try:
        yield
    finally:
        sys.stderr = old_stderr

class Tee:
    def __init__(self, *files):
        self.files = files

    def write(self, obj):
        for f in self.files:
            f.write(obj)

    def flush(self):
        for f in self.files:
            f.flush()

# Create the full path by joining root_dir and filename
filename = 'warnings_and_output.txt'
full_path = os.path.join(root_dir, filename)

# File to save warnings
with open(full_path, 'w') as f:
    with tee_stdout(f), tee_stderr(f):
        # Initialize an empty DataFrame to store the image statistics and metadata
        df = pd.DataFrame()

        # Iterate through directories and sub-directories
        for root, dirs, files in os.walk(root_dir):
            for file in files:
                if file.endswith(".tif") and ('cell_only' in root or 'background_only' in root):
                    file_path = os.path.join(root, file)
                    stats = calculate_image_stats(file_path)
                    
                    if stats is not None:  # Only add to DataFrame if stats were successfully calculated
                        df = pd.concat([df, pd.DataFrame([stats])], ignore_index=True)




In [19]:

# Initialize an empty list to store the extracted information
data_list = []

# Read the text file
with open(full_path) as file:
    lines = file.readlines()
    for line in lines:
        # Use regular expressions to match the pattern and extract the relevant parts
        match = re.search(r'at (.+\\([^\\]+)\\([^\\]+)\\([^\\]+\.tif))', line)
        if match:
            full_path, image_name, image_type, image = match.groups()
            channel = re.search(r'Channel_(\d+)', image).group(1) if re.search(r'Channel_(\d+)', image) else 'Unknown'
            data_list.append([image_name, image_type, image, f"Channel_{channel}"])

# Convert the list to a DataFrame
df_txt = pd.DataFrame(data_list, columns=['image_name', 'image_type', 'image', 'channel'])

# Count the occurrences of each unique row based on the group ['image_name', 'image_type', 'channel']
df_txt['count'] = df_txt.groupby(['image_name', 'image_type', 'channel'])['image_name'].transform('count')

# Drop duplicate rows
df_txt_unique = df_txt.drop_duplicates(subset=['image_name', 'image_type', 'channel'])

df_txt_unique.head()


Unnamed: 0,image_name,image_type,image,channel,count
0,20231219_LZ22225_0min_uninf.001,background_only,Time_0_Z_0_Channel_3.tif,Channel_3,40
40,20231219_LZ22225_0min_uninf.001,cell_only,Time_0_Z_0_Channel_3.tif,Channel_3,40
80,20231219_LZ22225_0min_uninf.002,background_only,Time_0_Z_0_Channel_3.tif,Channel_3,52
132,20231219_LZ22225_0min_uninf.002,cell_only,Time_0_Z_0_Channel_3.tif,Channel_3,52
184,20231219_LZ22225_10min_dilinf.017,background_only,Time_0_Z_0_Channel_3.tif,Channel_3,56


In [4]:
print(root)

NameError: name 'root' is not defined

In [21]:

# Extract metadata for channel naming
df_metadata = extract_metadata_channel(root_dir)

# Merge the metadata into the DataFrame
merge_metadata(df, df_metadata, channel_map = channel_map)

# Show the DataFrame (For demonstration, will only display the head)
print(df.head())


          mean  median    max   min     std_dev  skewness  kurtosis  \
0   931.619010   925.0   2938   585  102.513911  0.623455  3.627348   
1   982.262970   976.0   1597   596  107.856363  0.345948  0.177546   
2  4681.323827  4599.0  13487  2706  681.038004  1.704714  9.463394   
3   925.748429   920.0   3579   598  101.256611  0.690931  5.720718   
4   969.281303   963.0   1637   601  106.321071  0.346015  0.193220   

   pixel_count  area_covered  \
0       259923      0.991528   
1       259923      0.991528   
2       259923      0.991528   
3       259923      0.991528   
4       259923      0.991528   

                                       full_filepath frame  z_stack channel  \
0  C:\Users\mattt\Documents\mutants\biorep0\LZ222...     0  Channel       0   
1  C:\Users\mattt\Documents\mutants\biorep0\LZ222...     0  Channel       1   
2  C:\Users\mattt\Documents\mutants\biorep0\LZ222...     0  Channel       2   
3  C:\Users\mattt\Documents\mutants\biorep0\LZ222...     1  Chan

In [22]:

# Specify the filename
filename = 'df.csv'

# Create the full path by joining root_dir and filename
full_path = os.path.join(root_dir, filename)

# Write the DataFrame to a CSV file
df.to_csv(full_path, index=False)


In [10]:
import os
from PIL import Image

def merge_images(phase_path, fish_path, output_path):
    # Open the phase and fish images
    phase_img = Image.open(phase_path)
    fish_img = Image.open(fish_path)

    # Merge images (assuming both images are of the same size)
    merged_img = Image.merge("RGB", (phase_img.split()[0], fish_img.split()[0], fish_img.split()[0]))

    # Save the merged image
    merged_img.save(output_path)

#root_dir = 'path/to/mutants'  Replace with your root directory path if not already identified

# Traverse through directories
for root, dirs, files in os.walk(root_dir):
    if 'phase-MAX.tif' in files and 'fish-MAX.tif' in files:
        # Construct file paths
        phase_path = os.path.join(root, 'phase-MAX.tif')
        fish_path = os.path.join(root, 'fish-MAX.tif')
        output_path = os.path.join(root, os.path.basename(root) + '_for_microbeJ.tif')

        # Merge and save the images
        merge_images(phase_path, fish_path, output_path)
        print(f"Merged image saved at {output_path}")


ValueError: mode mismatch