# **Paramecium cell shape analysis from IF experiments**
-------

Code adapted from the cell mask pipeline of Benedetta Noferi.

Input:
- .nd2 files from the IF experiments put together in a single folder (folder_path)

Explanation workflow:
- The code takes as an input the TIF images obtained through a macro of imageJ, that already opens the `.nd2` files, obtains the metadata and creates (and saves) a maximum projection from the centrin channel (second channel) of each image to use for extracting the shape parameters.
- (If needed, different cell types can be put into different categories automatically using the image name.)
- The max projection of the centrin channel is blurred by a gaussian filter before thresholded using Otsu. Regionprops is then used to filter out wrong shapes and measure the size and shape of all cells. Filtered thresholded images are saved. Shape parameters are saved in a csv file and plotted in the jupyter notebook.


In [None]:
#Import necessary libraries
import nd2
import os
import re
import pims
import numpy as np
import pandas as pd
import seaborn as sns
from skimage import measure
import matplotlib.pyplot as plt
from scipy import ndimage as ndi
import matplotlib.patches as patches
from scipy.ndimage import binary_closing
from skimage.filters import gaussian, threshold_otsu
from microfilm import colorify
import tifffile as tiff
from concurrent.futures import ThreadPoolExecutor
from functions_shape_analysis import *  


# Path to folder that contains the .nd2 files
folder_path = "W:\\Users\\Daphne\\Imaging_Daphne\\25-06-25_XC_ptetwt_IF\\"

### **.nd2 import and z-projection**

In [None]:
samplegroup, centrin, dna = import_and_classify_nd2_files_2ch(folder_path) # 2 channels
# samplegroup, centrin, polye, dna = import_and_classify_nd2_files(folder_path) # 3 channels
display_multichannel_samplegroup(samplegroup)

In [None]:
# 2 channel images: Data loading and visualization with scale bar
display_multichannel_samplegroup_2ch(samplegroup)

# # 3 channel images: Data loading and visualization with scale bar
# samplegroup, centrin, polye, dna = import_and_classify_nd2_files(folder_path)
# display_multichannel_samplegroup(samplegroup)

In [None]:
# Display the sample groups (different cell types (one or more)) and for each sample group the image files with their calibration and image shape (x,y)
centringroup = centrin
# centringroup = polye # if the order of the channels is different, you can use polye or dna 
# centringroup = dna # if the order of the channels is different, you can use polye or dna
print("Samplegroup:", centringroup.keys())

#Optional: display the dictionary as a the set of filename, image, calibration
for sample in centringroup:
    print(f"Sample: {sample}")
    for filename, image, calibration in centringroup[sample]:
        print(f"  Filename: {filename}, Calibration: {calibration}, Image shape: {image.shape}")


### **Tif of centrin maximum projection in Z**

In [None]:
# Once classified them according to the type, we can open them and display them (with the scale)
# Call the function to display the images
display_samplegroup_with_scale(centringroup, calibration)
plt.show()

### **Image pre-processing for cell masking**

For the creation of the mask we first apply a broad Gaussian blurring, followed but an Otsu thresholding and filling the holes. The initial data is the maximum projection of the centrin channel showing the cell shape

**Gaussian blurring**

In [None]:
#Gaussian blurring, i.e. Smoothing of the image for a better thresholding
sigma = 10
gaussian_blur = apply_function_to_samplegroup(centringroup, gaussian, sigma=sigma)

# Display the processed images
display_samplegroup(gaussian_blur)


**Otsu threshold**

In [None]:
# Create a dictionary to store the thresholded images
Otsu_threshold = {}
# Apply otsu's threshold to the images of the gaussian_blur dictionary
for sample, files in gaussian_blur.items():
    for filename, centrin, calibration in files:
        # Apply OTSU's threshold to the image
        thresh = threshold_otsu(centrin)
        centrin_thresholded = centrin > thresh*1.1 #VARY PER SAMPLE
        # Binary closure (not fundamental at this point, but good procedure)
        centrin_smoothed = binary_closing(centrin_thresholded, structure=np.ones((3, 3)))
        # Update the image in the dictionary
        if sample not in Otsu_threshold:
            Otsu_threshold[sample] = []
        Otsu_threshold[sample].append((filename, centrin_smoothed, calibration))

# Call the function to display the images
display_samplegroup(Otsu_threshold)


**Filling holes**

In [None]:
Filled_holes = apply_function_to_samplegroup(Otsu_threshold, ndi.binary_fill_holes)

# Call the function to display the images
display_samplegroup(Filled_holes)

**Label each cell from 0 to N cells in the region**

In [None]:
masked_images = {}

# Iterate over the Filled_holes dictionary
for sample, files in Filled_holes.items():
    for filename, centrin_filled, calibration in files:
        # Label the regions in the image
        centrin_labeled, num_features = ndi.label(centrin_filled)
        
        # Store the labeled image and the number of features in the new dictionary
        if sample not in masked_images:
            masked_images[sample] = []
        masked_images[sample].append((filename, centrin_labeled, num_features, calibration))

# Example usage with the filtered labeled images
display_samplegroup_with_legend(masked_images)

**Filtering of the regions**

Before calculating the cell shape parameters using regionprops, we filter out noise (using minimum area) and remove the cells that are attached to the border

In [None]:
# Create a new dictionary to store the filtered masked images
masked_filtered_images = {}

    
# Iterate over the masked_images dictionary
area_threshold = 50000  # Set the minimum area threshold
for sample, files in masked_images.items():
    for filename, labeled_image, num_features, calibration in files:

        space = (1/calibration, 1/calibration)  # Set the spacing for regionprops

        regions = measure.regionprops(labeled_image, spacing=space)  # Get the regions from the labeled image
        
        # Get image dimensions
        image_height, image_width = labeled_image.shape

        # Filter regions based on the area threshold and border condition
        filtered_regions = [
            region for region in regions
            if region.area * (calibration ** 2) >= area_threshold and
            region.bbox[0] >= 2 and region.bbox[1] >= 2 and  # Ensures that the region is not too close to the top and left borders
            region.bbox[2] <= image_height * 0.999 and region.bbox[3] <= image_width * 0.999  # Ensures that the region is not too close to the bottom and right borders
        ]

        # Create a new labeled image for the filtered regions
        filtered_labeled_image = np.zeros_like(labeled_image)
        for new_label, region in enumerate(filtered_regions, start=1):
            filtered_labeled_image[labeled_image == region.label] = new_label

        # Store the filtered labeled image in the new dictionary
        if sample not in masked_filtered_images:
            masked_filtered_images[sample] = []
        masked_filtered_images[sample].append((filename, filtered_labeled_image, len(filtered_regions), calibration))

# Call the function to display the filtered labeled images
display_samplegroup_with_labels(masked_filtered_images)

# save the filtered labeled images to a new folder
output_folder = os.path.join(folder_path, "Filtered_Labeled_Images")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

import tifffile as tiff
for sample, files in masked_filtered_images.items():
    for filename, filtered_labeled_image, num_features, calibration in files:
        # Create a new filename for the filtered labeled image
        new_filename = os.path.join(output_folder, f"{filename[:-4]}.tif")
        
        # Save the filtered labeled image as a TIFF file
        tiff.imwrite(new_filename, filtered_labeled_image.astype(np.uint16))

**Put the regions measurements in a dataframe**

In [None]:
# Create a DataFrame to store the properties
data = {
    'Sample': [],
    'Number Within Sample': [],
    'Filename': [],
    'Label': [],
    'Area': [],
    'Major Axis Length': [],
    'Minor Axis Length': [],
    'Aspect Ratio': []
}

# Iterate over the masked_filtered_images dictionary
for sample, files in masked_filtered_images.items():
    for file_index, (filename, labeled_image, num_features, calibration) in enumerate(files):
        # Calculate region properties
        regions = measure.regionprops(labeled_image)
        
        # Populate the DataFrame with region properties
        for region in regions:
            data['Sample'].append(sample)
            data['Number Within Sample'].append(file_index + 1)
            data['Filename'].append(filename)
            data['Label'].append(region.label)
            data['Area'].append(region.area * (calibration ** 2))
            data['Major Axis Length'].append(region.major_axis_length * calibration)
            data['Minor Axis Length'].append(region.minor_axis_length * calibration)
            data['Aspect Ratio'].append(region.major_axis_length / region.minor_axis_length)

# Convert the dictionary to a Pandas DataFrame
df_data = pd.DataFrame(data)

# Display the DataFrame
display(df_data)

# Save the DataFrame to a CSV file for later analysis
df_data.to_csv(folder_path + 'region_properties.csv', index=False)

**Data visualization for different samples**

In [None]:
# Display the sample groups with the mean properties

mean_properties = calculate_mean_properties(masked_filtered_images)
display_samplegroup_with_mean_properties(masked_filtered_images, mean_properties)

### **Data analysis from regionprops**

- The cell area
- The cell major and minor axis length (fit as an ellipse), including the aspect ratio
- The cell eccentricity (eccentricity of the ellipse that has the same second moments as the region)
- The cell solidity, (the area of the region / area of the convex hull of the region)

In [None]:
# Convert the masked_filtered_images dictionary into a DataFrame for easier plotting
data = {
    'Sample': [],
    'Area': [],
    'Major Axis Length': [], #lenght of the major axis of the ellipse that has the same normalized second central moments as the region
    'Minor Axis Length': [], # length of the minor axis of the ellipse that has the same normalized second central moments as the region
    'Aspect Ratio': [], # ratio of the major axis length to the minor axis length
    'Eccentricity': [], # eccentricity of the ellipse that has the same second moments as the region
    'Solidity': [], # solidity of the region, i.e. area of the region / area of the convex hull of the region
    'Metadata': [], # Metadata extracted from the filename
}

for sample, files in masked_filtered_images.items():
    for filename, labeled_image, num_features, calibration in files:
        # Extract metadata from the filename 
        match = re.search(r'_cell(\d+)\.nd2$', filename)
        metadata = int(match.group(1)) if match else "Unknown"

       # Calculate region properties
        regions = measure.regionprops(labeled_image)
        for region in regions:
            data['Sample'].append(sample)
            # Convert area and major axis length to micrometers using calibration
            data['Area'].append(region.area * (calibration ** 2))
            data['Major Axis Length'].append(region.major_axis_length * calibration)
            data['Minor Axis Length'].append(region.minor_axis_length * calibration)
            data['Aspect Ratio'].append(region.major_axis_length / region.minor_axis_length)
            data['Eccentricity'].append(region.eccentricity)
            data['Solidity'].append(region.solidity)
            data['Metadata'].append(metadata)

# Create a DataFrame from the data dictionary
df_plot = pd.DataFrame(data)



# Plot for Area
plt.figure(figsize=(6, 8))
sns.set_theme(style="white")
sns.boxplot(x = 'Sample', hue='Sample', y='Area', data=df_plot, palette='crest', showfliers=False, legend = False,
            showmeans=True, meanprops={"marker":"x", "markeredgecolor":"black", "markersize":"7"}, linewidth=2)
sns.stripplot(x='Sample', y='Area', data=df_plot, color='red', alpha=0.5, jitter=True, dodge=True)

# # Add metadata annotations next to each data point
# for i, row in df_plot.iterrows():
#     plt.text(
#         x=row['Sample'], y=row['Area'], s=str(row['Metadata']),
#         color='blue', fontsize=8, ha='left', va='center'
#     )
# # Add a custom legend
# from matplotlib.lines import Line2D
# legend_elements = [
#     Line2D([0], [0], marker='o', color='w', label='Datapoint', markerfacecolor='black', markersize=8),
#     Line2D([0], [0], marker='x', color='w', label='Mean', markeredgecolor='black', markersize=8),
#     Line2D([0], [0], color='blue', label='Cell numbers', markersize=0)
# ]
# plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
plt.title('Cell Area', fontsize=16, weight='bold')
plt.ylabel(r'Area [$\mu m^2$]', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
# plt.xlabel('Sample', fontsize=12)
# plt.tight_layout()
plt.savefig(folder_path + 'cell_area_boxplot.png', dpi=300, bbox_inches='tight')
plt.show()



# Plot for Major Axis Length
plt.figure(figsize=(6, 8))
sns.boxplot(x = 'Sample', hue='Sample', y='Major Axis Length', data=df_plot, palette='crest', showfliers=False, legend=False,
            showmeans=True, meanprops={"marker":"x", "markeredgecolor":"black", "markersize":"7"})
sns.stripplot(x='Sample', y='Major Axis Length', data=df_plot, color='black', alpha=0.6, jitter=True, dodge=True)
# # Add metadata annotations next to each data point
# for i, row in df_plot.iterrows():
#     plt.text(
#         x=row['Sample'], y=row['Major Axis Length'], s=str(row['Metadata']),
#         color='blue', fontsize=8, ha='left', va='center'
#     )
# # Add a custom legend
# legend_elements = [
#     Line2D([0], [0], marker='o', color='w', label='Datapoint', markerfacecolor='black', markersize=8),
#     Line2D([0], [0], marker='x', color='w', label='Mean', markeredgecolor='black', markersize=8),
#     Line2D([0], [0], color='blue', label='Cell numbers', markersize=0)
# ]
# plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
plt.title('Major Axis Length', fontsize=16, weight='bold')
plt.ylabel(r'Major Axis Length [$\mu $m]', fontsize=12)
plt.xlabel('Sample', fontsize=12)
plt.tight_layout()
plt.show()



# Plot for Minor Axis Length
plt.figure(figsize=(6, 8))
sns.boxplot(x = 'Sample', hue='Sample', y='Minor Axis Length', data=df_plot, palette='crest', showfliers=False, legend=False,
            showmeans=True, meanprops={"marker":"x", "markeredgecolor":"black", "markersize":"7"})
sns.stripplot(x='Sample', y='Minor Axis Length', data=df_plot, color='black', alpha=0.6, jitter=True, dodge=True)
# # Add metadata annotations next to each data point
# for i, row in df_plot.iterrows():
#     plt.text(
#         x=row['Sample'], y=row['Minor Axis Length'], s=str(row['Metadata']),
#         color='blue', fontsize=8, ha='left', va='center'
#     )
# # Add a custom legend
# legend_elements = [
#     Line2D([0], [0], marker='o', color='w', label='Datapoint', markerfacecolor='black', markersize=8),
#     Line2D([0], [0], marker='x', color='w', label='Mean', markeredgecolor='black', markersize=8),
#     Line2D([0], [0], color='blue', label='Cell numbers', markersize=0)
# ]
# plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
plt.title('Minor Axis Length', fontsize=16, weight='bold')
plt.ylabel(r'Minor Axis Length [$\mu $m]', fontsize=12)
plt.xlabel('Sample', fontsize=12)
plt.tight_layout()
plt.show()



# Plot for Aspect Ratio
plt.figure(figsize=(6, 8))
sns.boxplot(x = 'Sample', hue='Sample', y='Aspect Ratio', data=df_plot, palette='crest', showfliers=False, legend=False,
            showmeans=True, meanprops={"marker":"x", "markeredgecolor":"black", "markersize":"7"})
sns.stripplot(x='Sample', y='Aspect Ratio', data=df_plot, color='black', alpha=0.6, jitter=True, dodge=True)
# # Add metadata annotations next to each data point
# for i, row in df_plot.iterrows():
#     plt.text(
#         x=row['Sample'], y=row['Aspect Ratio'], s=str(row['Metadata']),
#         color='blue', fontsize=8, ha='left', va='center'
#     )
# # Add a custom legend
# legend_elements = [
#     Line2D([0], [0], marker='o', color='w', label='Datapoint', markerfacecolor='black', markersize=8),
#     Line2D([0], [0], marker='x', color='w', label='Mean', markeredgecolor='black', markersize=8),
#     Line2D([0], [0], color='blue', label='Cell numbers', markersize=0)
# ]
# plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
plt.title('Aspect Ratio', fontsize=16, weight='bold')
plt.ylabel('Aspect Ratio', fontsize=12)
plt.xlabel('Sample', fontsize=12)
plt.tight_layout()
plt.show()



# Plot for Eccentricity
plt.figure(figsize=(6, 8))
sns.boxplot(x = 'Sample', hue='Sample', y='Eccentricity', data=df_plot, palette='crest', showfliers=False, legend=False,
            showmeans=True, meanprops={"marker":"x", "markeredgecolor":"black", "markersize":"7"})
sns.stripplot(x='Sample', y='Eccentricity', data=df_plot, color='black', alpha=0.6, jitter=True, dodge=True)
# # Add metadata annotations next to each data point
# for i, row in df_plot.iterrows():
#     plt.text(
#         x=row['Sample'], y=row['Eccentricity'], s=str(row['Metadata']),
#         color='blue', fontsize=8, ha='left', va='center'
#     )
# # Add a custom legend
# legend_elements = [
#     Line2D([0], [0], marker='o', color='w', label='Datapoint', markerfacecolor='black', markersize=8),
#     Line2D([0], [0], marker='x', color='w', label='Mean', markeredgecolor='black', markersize=8),
#     Line2D([0], [0], color='blue', label='Cell numbers', markersize=0)
# ]
# plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
plt.title('Eccentricity', fontsize=16, weight='bold')
plt.ylabel('Eccentricity', fontsize=12)
plt.xlabel('Sample', fontsize=12)
plt.tight_layout()
plt.show()



# Plot for Solidity
plt.figure(figsize=(12, 6))
sns.boxplot(x = 'Sample', hue='Sample', y='Solidity', data=df_plot, palette='crest', showfliers=False, legend=False,
            showmeans=True, meanprops={"marker":"x", "markeredgecolor":"black", "markersize":"7"})
sns.stripplot(x='Sample', y='Solidity', data=df_plot, color='black', alpha=0.6, jitter=True, dodge=True)
# # Add metadata annotations next to each data point
# for i, row in df_plot.iterrows():
#     plt.text(
#         x=row['Sample'], y=row['Solidity'], s=str(row['Metadata']),
#         color='blue', fontsize=8, ha='left', va='center'
#     )
# # Add a custom legend
# legend_elements = [
#     Line2D([0], [0], marker='o', color='w', label='Datapoint', markerfacecolor='black', markersize=8),
#     Line2D([0], [0], marker='x', color='w', label='Mean', markeredgecolor='black', markersize=8),
#     Line2D([0], [0], color='blue', label='Cell numbers', markersize=0)
# ]
# plt.legend(handles=legend_elements, loc='upper right', fontsize=10)
plt.title('Solidity', fontsize=16, weight='bold')
plt.ylabel('Solidity', fontsize=12)
plt.xlabel('Sample', fontsize=12)
plt.tight_layout()
plt.show()

In [1]:
# Calculate mean and SEM of each parameter
import pandas as pd
import numpy as np

# Load the CSV file
csv_path = r'W:\Users\Daphne\WT_RESULTS\WT_IF\region_properties_IF_allcombined.csv'
df = pd.read_csv(csv_path)

# Specify columns of interest
columns = ['Area', 'Major Axis Length', 'Minor Axis Length', 'Aspect Ratio']

# Calculate statistics
results = {}
for col in columns:
    mean = df[col].mean()
    std = df[col].std()
    sem = df[col].sem()
    results[col] = {
        'mean': mean,
        'std': std,
        'sem': sem
    }

# Display results
for col, stats in results.items():
    print(f"{col}:")
    print(f"  Mean: {stats['mean']:.3f}")
    print(f"  Std: {stats['std']:.3f}")
    print(f"  SEM: {stats['sem']:.3f}")

Area:
  Mean: 3616.437
  Std: 547.488
  SEM: 61.597
Major Axis Length:
  Mean: 114.300
  Std: 9.620
  SEM: 1.082
Minor Axis Length:
  Mean: 40.342
  Std: 3.540
  SEM: 0.398
Aspect Ratio:
  Mean: 2.845
  Std: 0.247
  SEM: 0.028
