## Computing the orientation of the effective stroke of the cilia beat

This code takes as input a folder containing csv files with trajectories of cilia tips during the effective stroke of the cilia beat (obtained using ImageJ). All the cells were oriented correctly. Here we use 3 different methods to compute the orientation of the effective stroke. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.linear_model import RANSACRegressor, LinearRegression


# Open the folder containing a subfolder for each cell with its cilia trajectories and creates for each cell a plot subdirectory
# base_folder = Path("W:/Users/Daphne/WT_RESULTS/WT_CB_direction/Ventral/zone2/2/")
base_folder = Path("W:/Users/Daphne/WT_RESULTS/WT_OA/PM_CBdirection/")
save_plots = True
show_plots = False
plots_subdir = "plots"


# Help functions
def _angle_from_vector(dx: float, dy: float) -> float:
    return np.degrees(np.arctan2(dx, dy))

def _normalize_angle(angle_deg: float) -> float:
    """Normalize to [-90, +90] so orientation is directionless."""
    angle = ((angle_deg + 180) % 360) - 180
    if angle > 90:
        angle -= 180
    elif angle < -90:
        angle += 180
    return angle

# Fit functions
def _fit_line_standard(x: np.ndarray, y: np.ndarray):
    if len(x) < 2:
        return None
    if np.isclose(np.nanstd(x), 0.0, atol=1e-12):
        return {"model": "vertical", "x0": np.nanmean(x)}
    a, b = np.polyfit(x, y, 1)
    return {"model": "yx", "a": a, "b": b}

def _fit_line_ransac(x: np.ndarray, y: np.ndarray):
    if len(x) < 2:
        return None
    x = x.reshape(-1,1)
    y = y.reshape(-1,1)
    try:
        ransac = RANSACRegressor(estimator=LinearRegression(), min_samples=max(2, int(0.5*len(x))))
        ransac.fit(x,y)
        a = ransac.estimator_.coef_[0][0]
        b = ransac.estimator_.intercept_[0]
        return {"model": "ransac", "a": a, "b": b}
    except Exception:
        return None

# Orientation calculator
def compute_orientations_for_df(df: pd.DataFrame):
    x = df["X"].to_numpy()
    y = df["Y"].to_numpy()

    dx = float(x[-1]-x[0])
    dy = float(y[-1]-y[0])
    angle_end = _normalize_angle(_angle_from_vector(dx,dy))

    # Standard fit
    fit_std = _fit_line_standard(x,y)
    if fit_std and fit_std["model"]=="yx":
        angle_std = _normalize_angle(_angle_from_vector(1, fit_std["a"]))
        x_fit = np.linspace(np.nanmin(x), np.nanmax(x),200)
        y_fit = fit_std["a"]*x_fit + fit_std["b"]
    elif fit_std and fit_std["model"]=="vertical":
        angle_std = 0.0
        y_fit = np.linspace(np.nanmin(y), np.nanmax(y),200)
        x_fit = np.full_like(y_fit, fit_std["x0"])
    else:
        angle_std = np.nan
        x_fit, y_fit = x,y

    # RANSAC robust fit
    fit_ransac = _fit_line_ransac(x,y)
    if fit_ransac:
        angle_ransac = _normalize_angle(_angle_from_vector(1, fit_ransac["a"]))
        x_fit_r = np.linspace(np.nanmin(x), np.nanmax(x),200)
        y_fit_r = fit_ransac["a"]*x_fit_r + fit_ransac["b"]
    else:
        angle_ransac = np.nan
        x_fit_r, y_fit_r = x,y

    return {
        "endpoints_angle": angle_end,
        "std_angle": angle_std,
        "ransac_angle": angle_ransac,
        "line_points_std": (x_fit,y_fit),
        "line_points_ransac": (x_fit_r,y_fit_r)
    }



# Main loop 
# Each cell has a subfolder with it's csv files of the cilia trajectories (inside the base_folder)

# for cell_folder in base_folder.glob("cell*/"):
for cell_folder in base_folder.glob("*/"):
    results = []
    plots_dir = cell_folder/plots_subdir
    if save_plots:
        plots_dir.mkdir(parents=True, exist_ok=True)

    for csv_file in sorted(cell_folder.glob("*ptetwt*.csv")):
        df = pd.read_csv(csv_file)
        metrics = compute_orientations_for_df(df)
        results.append({
            "file": csv_file.name,
            "endpoints_angle": metrics["endpoints_angle"],
            "std_angle": metrics["std_angle"],
            "ransac_angle": metrics["ransac_angle"]
        })

        # Plot
        x,y = df["X"],df["Y"]
        plt.figure()
        plt.plot(x,y,'o',label='points')
        plt.plot(metrics["line_points_std"][0], metrics["line_points_std"][1],'-',label='std fit')
        plt.plot(metrics["line_points_ransac"][0], metrics["line_points_ransac"][1],'--',label='RANSAC fit')
        plt.gca().invert_yaxis()
        plt.title(f"{csv_file.name}\nend={metrics['endpoints_angle']:.1f}°, std={metrics['std_angle']:.1f}°, ransac={metrics['ransac_angle']:.1f}°")
        plt.axis('equal')
        plt.legend()
        if save_plots:
            plt.savefig(plots_dir/f"{csv_file.stem}_fits.png",dpi=200)
        if show_plots:
            plt.show()
        else:
            plt.close()

    pd.DataFrame(results).to_csv(cell_folder/f"{cell_folder.name}_orientations.csv",index=False)





# Export the orientations to csv files

all_rows = []
for cell_folder in base_folder.glob("*/"):
    f = cell_folder/f"{cell_folder.name}_orientations.csv"
    if not f.exists(): continue
    df = pd.read_csv(f)
    if df.empty: continue
    n=len(df)
    def stats(col):
        return df[col].mean(), df[col].std(ddof=1), df[col].std(ddof=1)/np.sqrt(n)
    m_end,s_end,sem_end=stats("endpoints_angle")
    m_std,s_std,sem_std=stats("std_angle")
    m_r,s_r,sem_r=stats("ransac_angle")
    all_rows.append({
        "cell": cell_folder.name,
        "n": n,
        "mean_end": m_end,"std_end": s_end,"sem_end": sem_end,
        "mean_std": m_std,"std_std": s_std,"sem_std": sem_std,
        "mean_ransac": m_r,"std_ransac": s_r,"sem_ransac": sem_r
    })

per_cell=pd.DataFrame(all_rows)
per_cell.to_csv(base_folder/"all_cells_orientation_summary.csv",index=False)

# Grand mean across cells
if not per_cell.empty:
    grand=[]
    for col,label in [("mean_end","endpoints"),("mean_std","std"),("mean_ransac","ransac")]:
        mean=per_cell[col].mean()
        sem=per_cell[col].std(ddof=1)/np.sqrt(len(per_cell))
        grand.append({"metric":label,"grand_mean":mean,"grand_sem":sem,"n_cells":len(per_cell)})
    pd.DataFrame(grand).to_csv(base_folder/"grand_summary.csv",index=False)


Explanation of the 3 methods to compute orientation:
1. Endpoints method:
    - Calculates the vector from the first point to the last point of the trajectory.
    - Computes the angle of this vector relative to the y-axis using arctan2(dx, dy).
    - This gives a quick estimate of the overall direction of the trajectory, but ignores the detailed path shape and is sensitive for if the first or last point is misplaced
 2. Standard linear fit:
    - Performs a linear regression (least squares fit) of y vs x to find the best-fit line through all points. (Fits a straight line to all points in the trajectory by minimizing squared error)
    - The slope of this line is used to compute the angle relative to the y-axis.
    - This method considers all points and is less sensitive to noise than the endpoints method, but can be affected by outliers.
 3. Robust RANSAC fit:
    - Uses the RANSAC algorithm to fit a line that is robust to outliers.
    - Iteratively selects random subsets of points to fit a line and identifies inliers that fit this model well.
    - The slope of the best RANSAC line is used to compute the angle relative to the y-axis.
    - Finds the dominant orientation even in noisy trajectories, but can sometimes discard too many points if the trajectory is short.




Endpoints = good “quick orientation.”

Standard Fit = best when you trust your clicks and the track is clean.

RANSAC Fit = best when you expect occasional mis-clicks or noisy paths.

In [None]:
# # plot the trajectories of all cilia next to each other as lines
# plt.figure()
# for cell_folder in base_folder.glob("*/"):
#     for csv_file in sorted(cell_folder.glob("ptetwt*.csv")):
#         df = pd.read_csv(csv_file)
#         x, y = df["X"], df["Y"]
#         plt.plot(x - np.nanmin(x) + 10*int(cell_folder.name.split("cell")[-1]), y - np.nanmin(y), '-o', markersize=2)
# plt.gca().invert_yaxis()
# plt.axis('equal')
# plt.title("Cilia trajectories")
# plt.savefig(base_folder/"all_cilia_trajectories.png", dpi=200)
# plt.show()


# plot the same trajectories but all around the center (0,0) and in different subplots per trajectory
plt.figure(figsize=(25, 20))
for i, cell_folder in enumerate(base_folder.glob("cell*/")):
    for j, csv_file in enumerate(sorted(cell_folder.glob("*ptetwt*.csv"))):
        df = pd.read_csv(csv_file)
        x, y = df["X"], df["Y"]
        x_centered = x - np.nanmean(x)
        y_centered = y - np.nanmean(y)
        plt.subplot(len(list(base_folder.glob("*/"))), 5, i*5 + j + 1)
        plt.plot(x_centered, y_centered, '-o', markersize=2)
        plt.gca().invert_yaxis()
        plt.axis('equal')
        plt.xlim(-10, 10)
        plt.ylim(-10, 10)
        plt.title(f"{cell_folder.name} - {csv_file.stem}")
plt.tight_layout()
plt.savefig(base_folder/"all_cilia_trajectories_centered.png", dpi=200)
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import tifffile

# Path to image of cell (one frame from the video)
# path_image = Path("W:\\Users\\Daphne\\WT_RESULTS\\WT_CB_direction\\Dorsal\\plot_trajectories_singlets\\ptetwt_14_frame300-1.tif")
path_image = Path("W:\\Users\\Daphne\\WT_RESULTS\\WT_CB_direction\\Ventral\\plot_trajectories\\260825_ptetwt_05_DZ_beat_frame1000-1.tif")

# Path to the folder containing all csv files of the cilia trajectories belonging to the video of path_image
folder_csv = path_image.parent

# Load image
image = tifffile.imread(path_image)

# not all trajectories have the same length, create dataframe with NaNs for missing values
df = pd.DataFrame()
for csv_file in sorted(folder_csv.glob("*ptetwt*.csv")):
    df_temp = pd.read_csv(csv_file)
    df_temp = df_temp.rename(columns={"X": f"X_{csv_file.stem}", "Y": f"Y_{csv_file.stem}"})
    df_temp = df_temp[[f"X_{csv_file.stem}", f"Y_{csv_file.stem}"]]
    df = pd.concat([df, df_temp], axis=1)

df.to_csv(folder_csv/"all_trajectories_combined.csv", index=False)


In [None]:
import matplotlib as mpl
# Ensure text is saved as editable text in SVG
mpl.rcParams['svg.fonttype'] = 'none'
# Set global font to Arial
mpl.rcParams['font.family'] = 'Arial'

# plot image with cilia trajectories overlaid that are stored in df
plt.figure(figsize=(8,8))
plt.imshow(image, cmap='gray')
for col in df.columns:
    if col.startswith("X_"):
        x = df[col]
        y = df[col.replace("X_", "Y_")]
        plt.plot(x, y, '-o', markersize=2, label=col.replace("X_", ""))
plt.axis('off')
plt.grid(False)
plt.title(f"Cilia trajectories overlaid on {path_image.name}")
plt.legend(fontsize='small')
plt.savefig(folder_csv/"cilia_trajectories_on_image_combined.png", dpi=300)
plt.savefig(folder_csv/"cilia_trajectories_on_image_combined.svg")
plt.show()

# create a plot with all trajectories in the same color palette, with a colorbar indicating the sequence of the points
plt.figure(figsize=(8,8))
plt.imshow(image, cmap='gray')
for col in df.columns:
    if col.startswith("X_"):
        x = df[col]
        y = df[col.replace("X_", "Y_")]
        points = np.array([x, y]).T.reshape(-1, 1, 2)
        segments = np.concatenate([points[:-1], points[1:]], axis=1)
        from matplotlib.collections import LineCollection
        lc = LineCollection(segments, cmap='viridis', norm=plt.Normalize(0, len(x)))
        lc.set_array(np.arange(len(x)))
        lc.set_linewidth(2)
        plt.gca().add_collection(lc)
plt.axis('off')
plt.grid(False)
plt.title(f"Cilia trajectories with color gradient on {path_image.name}")
plt.colorbar(lc, label='Point sequence')
plt.savefig(folder_csv/"cilia_trajectories_color_gradient_combined.png", dpi=300)
plt.savefig(folder_csv/"cilia_trajectories_color_gradient_combined.svg")
plt.show()

# smooth the trajectories using savitzky-golay filter and plot the smoothed trajectories with colorbar
from scipy.signal import savgol_filter


plt.figure(figsize=(8, 8))
plt.imshow(image, cmap='gray')

# Determine the length of the longest column
max_length = max(df[col].dropna().size for col in df.columns if col.startswith("X_"))

for col in df.columns:
    if col.startswith("X_"):
        x = df[col]
        y = df[col.replace("X_", "Y_")]

        # Remove NaN or Inf values
        valid_mask = np.isfinite(x) & np.isfinite(y)
        x, y = x[valid_mask], y[valid_mask]

        if len(x) >= 5:  # savgol_filter requires window length < len(x)
            x_smooth = savgol_filter(x, 5, 2)  # window size 5, polynomial order 2
            y_smooth = savgol_filter(y, 5, 2)
        else:
            x_smooth, y_smooth = x, y

        points = np.array([x_smooth, y_smooth]).T.reshape(-1, 1, 2)
        segments = np.concatenate([points[:-1], points[1:]], axis=1)
        from matplotlib.collections import LineCollection
        lc = LineCollection(segments, cmap='viridis', norm=plt.Normalize(0, max_length))
        lc.set_array(np.arange(len(x)))
        lc.set_linewidth(2)
        plt.gca().add_collection(lc)

plt.axis('off')
plt.grid(False)
plt.title(f"Smoothed cilia trajectories on {path_image.name}")
plt.colorbar(lc, label='Point sequence')
plt.savefig(folder_csv / "cilia_trajectories_smoothed_combined.png", dpi=300)
plt.savefig(folder_csv / "cilia_trajectories_smoothed_combined.svg")
plt.show()


In [None]:
# # Plot image with cilia trajectories overlaid
# plt.figure(figsize=(8,8))
# plt.imshow(image, cmap='gray')
# for csv_file in sorted(folder_csv.glob("*ptetwt*.csv")):
#     df = pd.read_csv(csv_file)
#     x, y = df["X"], df["Y"]
#     plt.plot(x, y, '-o', markersize=2, label=csv_file.stem)
# plt.axis('off')
# plt.grid(False)
# plt.title(f"Cilia trajectories overlaid on {path_image.name}")
# plt.legend(fontsize='small')
# plt.savefig(folder_csv/"cilia_trajectories_on_image.png", dpi=200)
# plt.show()
#
# # create a plot with all the trajectories in the same color palette, with a colorbar indicating the sequence of the points
# plt.figure(figsize=(8,8))
# plt.imshow(image, cmap='gray')
# for csv_file in sorted(folder_csv.glob("*ptetwt*.csv")):
#     df = pd.read_csv(csv_file)
#     x, y = df["X"], df["Y"]
#     points = np.array([x, y]).T.reshape(-1, 1, 2)
#     segments = np.concatenate([points[:-1], points[1:]], axis=1)
#     from matplotlib.collections import LineCollection
#     lc = LineCollection(segments, cmap='viridis', norm=plt.Normalize(0, len(x)))
#     lc.set_array(np.arange(len(x)))
#     lc.set_linewidth(2)
#     plt.gca().add_collection(lc)
# plt.axis('off')
# plt.grid(False)
# plt.title(f"Cilia trajectories with color gradient on {path_image.name}")
# plt.colorbar(lc, label='Point sequence')
# plt.savefig(folder_csv/"cilia_trajectories_color_gradient.png", dpi=200)
# plt.show()
#
# # smooth the trajectories using savitzky-golay filter and plot the smoothed trajectories with colorbar
# from scipy.signal import savgol_filter
# plt.figure(figsize=(8,8))
# plt.imshow(image, cmap='gray')
# for csv_file in sorted(folder_csv.glob("*ptetwt*.csv")):
#     df = pd.read_csv(csv_file)
#     x, y = df["X"], df["Y"]
#     if len(x) >= 5:  # savgol_filter requires window length < len(x)
#         x_smooth = savgol_filter(x, 5, 2)  # window size 5, polynomial order 2
#         y_smooth = savgol_filter(y, 5, 2)
#     else:
#         x_smooth, y_smooth = x, y
#     points = np.array([x_smooth, y_smooth]).T.reshape(-1, 1, 2)
#     segments = np.concatenate([points[:-1], points[1:]], axis=1)
#     from matplotlib.collections import LineCollection
#     lc = LineCollection(segments, cmap='plasma', norm=plt.Normalize(0, len(x)))
#     lc.set_array(np.arange(len(x)))
#     lc.set_linewidth(2)
#     plt.gca().add_collection(lc)
# plt.axis('off')
# plt.grid(False)
# plt.title(f"Smoothed cilia trajectories on {path_image.name}")
# plt.colorbar(lc, label='Point sequence')
# plt.savefig(folder_csv/"cilia_trajectories_smoothed.png", dpi=200)
# plt.show()
#
#
# # # smooth the trajectories using a moving average and plot the smoothed trajectories with colorbar --> not good, makes the trajectories smaller
# # from scipy.ndimage import uniform_filter1d
# # plt.figure(figsize=(8,8))
# # plt.imshow(image, cmap='gray')
# # for csv_file in sorted(folder_csv.glob("*ptetwt*.csv")):
# #     df = pd.read_csv(csv_file)
# #     x, y = df["X"], df["Y"]
# #     if len(x) >= 5:  # uniform_filter1d requires window length < len(x)
# #         x_smooth = uniform_filter1d(x, size=5)  # window size 5
# #         y_smooth = uniform_filter1d(y, size=5)
# #     else:
# #         x_smooth, y_smooth = x, y
# #     points = np.array([x_smooth, y_smooth]).T.reshape(-1, 1, 2)
# #     segments = np.concatenate([points[:-1], points[1:]], axis=1)
# #     from matplotlib.collections import LineCollection
# #     lc = LineCollection(segments, cmap='cool', norm=plt.Normalize(0, len(x)))
# #     lc.set_array(np.arange(len(x)))
# #     lc.set_linewidth(2)
# #     plt.gca().add_collection(lc)
# # plt.axis('off')
# # plt.grid(False)
# # plt.title(f"Smoothed cilia trajectories (moving average) on {path_image.name}")
# # plt.colorbar(lc, label='Point sequence')
# # plt.savefig(folder_csv/"cilia_trajectories_smoothed_moving_average.png", dpi=200)
# # plt.show()
#


In [1]:
import numpy as np

data = np.load('C:\\Users\\laan\\Downloads\\filtered_trajectories_271025_swimming_allcilia_1_0.npy')