This notebook reproduces the **instantaneous speed** described in the manuscript.  

## Analysis Workflow
- **Preprocessing**  
  - Walk through subfolders of TrackMate outputs (CSV files containing “edge”).
        [assuming csv files for each condition (stage*species) are already concetenated into 1 file]
  - Clean headers and sort trajectories by time.
  - Filter only trajectories with ≥ 31 time points (i.e., 150 min duration)
  - Truncate to first 31 time points for consistency  
  - Add derived columns:
    - `time` (row index × frame interval, in minutes)
    - `index` (track identifier combining TRACK_ID and file name)
  - Plot SPEED vs. time for each trajectory and save figures.

- **Plotting**  
  - Merge trajectories per subfolder and save as Excel files.

- **Outputs**  
  - Merged and filtered Excel files of trajectories.
  - SPEED vs. time plots per merged csv
---


In [None]:
# === Imports ===
import os
import itertools
from pathlib import Path
from statistics import mean

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

from scipy import stats
from scipy.optimize import curve_fit
from scipy.stats import gaussian_kde

from matplotlib import cm
from matplotlib.colors import Normalize

import openpyxl

### Step1 : Preprocessing and CSV merging

In [None]:
base_folder = 'base_folder_name' ## structure: base_folder >> subfolder >> csv. files
output_folder_merged_data = "outpout_folder_name"

for root, dirs, files in os.walk(base_folder):
    edge_files = [f for f in files if f.endswith(".csv") and "edge" in f.lower()]
    
    if edge_files:
        print(f"\n📂 Processing subfolder: {root}")
        subfolder_name = os.path.basename(root.rstrip("/\\"))
        combined_dfs = []

        for file in edge_files:
            file_path = os.path.join(root, file)
            df = pd.read_csv(file_path, skiprows=[1, 2, 3]) # remove redundant headers
            df.columns = df.columns.str.strip()
            df = df.sort_values(by="EDGE_TIME").reset_index(drop=True)
            df["time"] = df.index * 5 # time interval: 5min
            base_name = os.path.splitext(file)[0]
            df["index"] = df["TRACK_ID"].astype(str) + "_" + base_name

            combined_dfs.append(df)

        if combined_dfs:
            pd.concat(combined_dfs, ignore_index=True).to_excel(
                f"{output_folder_merged_data}{subfolder_name}.xlsx", index=False
            )


### Step2 : Trajectory plotting

In [None]:

figure_save = 'outout_figure_pathway'
file_path = "file_name.xlsx"
figure_name = os.path.splitext(os.path.basename(file_path))[0]


# --- Plot All Trajectories ---
df = pd.read_excel(file_path)
df['SPEED'] = df['SPEED']*60 # convert speed unit from micron/second into micron/minutes
grouped = df.groupby('index')
filtered_groups = {name: group.iloc[:31] for name, group in grouped if len(group) >= 31}
filtered_df = pd.concat(filtered_groups.values(), ignore_index=True)

plt.figure(figsize=(5, 4))
plt.rcParams['font.family'] = 'Arial'
for name, group in filtered_df.groupby('index'):
    plt.scatter(group['time'], group['SPEED'], s=20, alpha=0.8)
    plt.plot(group['time'], group['SPEED'], alpha=0.7, linewidth=0.7)

plt.xlabel('time', fontsize=10)
plt.ylabel('real-time-speed per displacement', fontsize=10)
plt.title(f'{figure_name} - All Trajectories', fontsize=12)
plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(25))
plt.xlim(5, 150) 
plt.tight_layout()
plt.savefig(f"{figure_save}/{figure_name}.pdf")
plt.show()

summary_save_path = os.path.join(figure_save, f"{figure_name}_group_summary.txt")

# --- mean+std Trajectories ---

pivot_table = filtered_df.pivot_table(index='time', columns='index', values='SPEED')

mean_values = pivot_table.mean(axis=1)
std_values = pivot_table.std(axis=1)

plt.figure(figsize=(5, 4))
plt.plot(mean_values.index, mean_values.values, color='black', linewidth=2, label='Mean')
plt.fill_between(mean_values.index, mean_values - std_values, mean_values + std_values, color='gray', alpha=0.5, label='Mean ± SD')

plt.xlabel('time', fontsize=10)
plt.ylabel('real-time-speed per displacement', fontsize=10)
plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(25))

plt.xlim(5, 150)
plt.ylim(-0.2, 1.6)

plt.title(f"{figure_name} - Normalized Mean ± SD", fontsize=12)
plt.legend()
plt.tight_layout()
plt.savefig(f"{figure_save}/{figure_name}_mean+sd.pdf")
plt.show()
