This notebook documents the analysis of intercellular spaces in live cerebral organoids,described in the manuscript.

## Analysis Workflow
1. **Segmentation, via 'otsu.py'**
   - High signal-to-background enabled robust global thresholding.  
   - Otsu’s method + empirically defined offset applied frame by frame.  
   - Binary masks generated and overlaid with raw images for QC in Napari.  
   - Segmentation errors corrected manually with Napari’s interactive labeling tools.
   - Export pixel stats as in csv. files

2. **Quantification**  
   - Space ratio = (segmented intercellular space area ÷ total image area).  
   - Temporal variability assessed by fitting space-ratio time series with linear regression and computing RMSE of residuals.  

## Outputs  
- Binary masks of intercellular spaces (QC’ed manually).  
- Overlays of masks with raw images.  
- Summaries of space ratio values.  
- Temporal variability metrics (linear regression slope, RMSE).  


In [None]:
# imports#
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from pathlib import Path
import openpyxl
from statistics import mean 
import seaborn as sns
import scipy.stats as stats
import itertools
from sklearn.linear_model import LinearRegression
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multitest as multitest

### Step1: read and process csv. data

In [None]:
base_folder = "input_folder_name" #folder containing csv. files generated by segmentation pipeline

plot_dir_deg1 = os.path.join(base_folder, "polyfit_deg1_plots") #define output folder for plots 
os.makedirs(plot_dir_deg1, exist_ok=True)

In [None]:
# === FUNCTION TO ANALYZE A CSV ===
## compute: space rate per time points, mean space rate in a track, and rmse based on linear regression

def analyze_csv(file_path):
    df = pd.read_csv(file_path)

    # Get time and space rate
    time = df["Slice Index"].values
    space_rate = df["Pixel Count"].values / df["Total Pixels"].values
    filename = Path(file_path).stem
    mean_rate = np.mean(space_rate)
    std_rate = np.std(space_rate)

    metrics = {
        "file": str(file_path),
        "condition": Path(file_path).parent.name,
        "mean_rate": mean_rate,
        "std_rate": std_rate,
        "time": time[-1]
    }

    # === DEGREE 1 POLYNOMIAL FIT ===
    coeffs1 = np.polyfit(time, space_rate, deg=1)
    predicted1 = np.polyval(coeffs1, time)
    residuals1 = space_rate - predicted1
    rmsd1 = np.sqrt(np.mean(residuals1 ** 2))
    metrics["residual_rmsd_deg1"] = rmsd1

    # === PLOT ===
    ## visualize the space rate over time and linear regression fitting, per plot per csv. file
    plt.figure(figsize=(6, 4))
    plt.plot(time, space_rate, 'o-', label="Space Rate")
    plt.plot(time, predicted1, 'r--', label=f"Poly Deg 1 (RMSD={rmsd1:.4f})")
    plt.axhline(mean_rate, color='gray', linestyle=':', label=f"Mean = {mean_rate:.3f} (RMSE={rmse_mean:.4f})")
    plt.xlabel("Slice Index")
    plt.ylabel("Space Rate")
    plt.ylim(0.05, 0.40)
    plt.title(f"PolyFit (1°) + Mean RMSE: {filename}")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir_deg1, f"{filename}_fit-deg1_mean.png"))
    plt.close()

    return metrics

In [None]:
# === Main loop: collect and analyze all CSVs ===
results = []

for root, dirs, files in os.walk(base_folder):
    for file in files:
        if file.lower().endswith(".csv"):
            file_path = os.path.join(root, file)
            try:
                metrics = analyze_csv(file_path)
                results.append(metrics)
                print(f"Processed: {file_path}")
            except Exception as e:
                print(f"❌ Error processing {file_path}: {e}")

# === Save results to Excel ===
output_excel_path = os.path.join(base_folder, "space_rate_metrics.xlsx")
results_df = pd.DataFrame(results)
results_df.to_excel(output_excel_path, index=False)

### Step2: visualize all data (real-time space value)

In [None]:
# ==== section1: organize data to prepare for plot drawing, by concatenate all csv. data into 1 excel. sheet

input_folder_path = "input_folder_name" # folder where csv. files locate

figure_save = 'output_folder_name' # define output folder for generated plots
output_excel = 'output_excel_name' # define output excel to contain concatenated csv. data

all_data = []

for file in os.listdir(input_folder_path):
    if file.endswith(".csv"):
        file_path = os.path.join(input_folder_path, file)
        df = pd.read_csv(file_path)


        df['time'] = df['Slice Index'] * 5 # interval: 5 min
        df['space_rate'] = df['Pixel Count'] / df['Total Pixels']
        df['index'] = os.path.splitext(file)[0]  # filename without extension

        all_data.append(df)


combined_df = pd.concat(all_data, ignore_index=True)


combined_df.to_excel(output_excel, index=False)
print(f"✅ Combined data saved to: {output_excel}")

In [None]:
# ==== section2: draw trajectory plot

## read data
file_path = output_excel 
df = pd.read_excel(file_path)
print(df.head())

figure_name = os.path.splitext(os.path.basename(file_path))[0] 

grouped = df.groupby('index') # group by index, which means conditions

filtered_groups = {name: group.iloc[:8] for name, group in grouped if len(group) >= 8} # filter valid groups (≥ 8 rows, only read first 8 rows)
filtered_df = pd.concat(filtered_groups.values(), ignore_index=True)


## Plot All Trajectories 
plt.figure(figsize=(6, 4))
for name, group in filtered_df.groupby('index'):
    plt.scatter(group['time'], group['space_rate'], s=40, alpha=0.4)
    plt.plot(group['time'], group['space_rate'], alpha=0.5, linewidth=1)

plt.xlabel('time', fontsize=12)
plt.ylabel('space_rate)', fontsize=12)
plt.title(f'{figure_name} - All Trajectories', fontsize=12)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(5))
plt.xlim(0, 35)

plt.tight_layout()
plt.savefig(f"{figure_save}/{figure_name}_all_trajs.pdf")
plt.show()

summary_save_path = os.path.join(figure_save, f"{figure_name}_group_summary.txt")


# Plot Mean+std Trajectories ---
pivot_table = filtered_df.pivot_table(index='time', columns='index', values='space_rate')

mean_values = pivot_table.mean(axis=1)
std_values = pivot_table.std(axis=1)

plt.figure(figsize=(6, 4))
plt.plot(mean_values.index, mean_values.values, color='black', linewidth=2, label='Mean')
plt.fill_between(mean_values.index, mean_values - std_values, mean_values + std_values, color='gray', alpha=0.5, label='Mean ± SD')

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(5))

plt.xlim(0, 35)

plt.title(f"{figure_name} - Normalized Mean ± SD", fontsize=12)
plt.legend()
plt.tight_layout()
plt.savefig(f"{figure_save}/{figure_name}_mean+sd.pdf")
plt.show()