# Volume Extraction from JLD2 Files

This function scans a folder of `.jld2` files representing simulation steps and calculates total spheroid volume over time,
by counting cell-occupied voxels and multiplying by a user-defined voxel volume. Output is saved as a CSV for downstream analysis.


In [2]:
import os
import h5py
import numpy as np
import pandas as pd

def extract_volume_metrics(input_folder, output_csv, voxel_volume_um3=8.0):
    """
    Extracts 3D volume (μm³) from .jld2 simulation files in the given folder.

    Parameters:
    - input_folder (str): path to folder with .jld2 files
    - output_csv (str): path to output .csv file
    - voxel_volume_um3 (float): volume of one voxel (default 2x2x2 μm = 8.0)
    """

    def load_cell_voxels_from_jld2(file_path):
        with h5py.File(file_path, "r") as f:
            cell_voxels = {
                int(k): f["cell_voxels"][k][()]
                for k in f["cell_voxels"]
            }
        return cell_voxels

    def compute_volume(cell_voxels):
        return sum(len(voxels) for voxels in cell_voxels.values()) * voxel_volume_um3

    os.makedirs(os.path.dirname(output_csv), exist_ok=True)
    records = []

    for fname in sorted(os.listdir(input_folder)):
        if not (fname.endswith(".jld2") and "step_" in fname):
            continue
        step_num = int(fname.split("_")[-1].replace(".jld2", ""))
        file_path = os.path.join(input_folder, fname)

        try:
            cell_voxels = load_cell_voxels_from_jld2(file_path)
            volume_um3 = compute_volume(cell_voxels)
            records.append({"step": step_num, "volume_um3": volume_um3})
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    if records:
        df = pd.DataFrame(records)
        df.sort_values("step", inplace=True)
        df.to_csv(output_csv, index=False)
        print(f"Saved volume metrics to: {output_csv}")
    else:
        print("No valid volume data extracted.")


# Plotting Volume Over Time for All Conditions

This function reads all `*_volume.csv` files in a folder and plots the total 3D volume over simulation steps.
It supports filtering by specific conditions and customizing labels and colors for better comparison.


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def plot_all_volume_metrics(folder, max_step=None, ratios_to_plot=None, title="3D Cell Volume Over Time"):
    """
    Plots volume (μm³) over time from all *_volume.csv files in a folder,
    with optional filtering by specified ratios.

    Parameters:
    - folder (str): path to folder with volume CSV files
    - max_step (int or None): if set, only plot steps <= max_step
    - ratios_to_plot (list of str or None): if set, only plot files whose prefix matches entries in this list
    """
    label_map = {
        "10_1": "GBM + MSC 10:1",
        "3_1": "GBM + MSC 3:1",
        "1_1": "GBM + MSC 1:1",
        "1_3": "GBM + MSC 1:3",
        "2_1": "GBM + MSC 2:1",
        "5_1": "GBM + MSC 5:1",
        "20_1": "GBM + MSC 20:1",
        "GBM": "GBM Alone",
        "MSC": "MSC Alone"
    }
    color_palette = {
        "2_1": "#1f77b4",  # blue
        "10_1": "#2ca02c",  # green
        "5_1": "#ff7f0e",  # orange
        "20_1": "#d62728"  # red

    }

    files = sorted([f for f in os.listdir(folder) if f.endswith("_volume.csv")])
    if not files:
        print("No *_volume.csv files found in folder.")
        return

    plt.figure(figsize=(10, 6))

    for fname in files:
        # Extract prefix correctly — everything before "_volume.csv"
        prefix = fname.replace("_volume.csv", "")
        if ratios_to_plot and prefix not in ratios_to_plot:
            continue

        df = pd.read_csv(os.path.join(folder, fname))
        if max_step is not None:
            df = df[df["step"] <= max_step]

        label = label_map.get(prefix, prefix)
        plt.plot(df["step"], df["volume_um3"], label=label, color = color_palette.get(prefix, "black"))

    plt.xlabel("Step")
    plt.ylabel("Total Volume (μm³)")
    plt.title(title)
    plt.legend()
    plt.grid(False)
    plt.tight_layout()
    plt.show()


# Spline-based Growth Rate Computation

This function uses cubic spline interpolation to compute smooth growth rates from volume data.
Returns both volume and its first derivative (growth rate) as a function of time, with optional plotting.


In [5]:
import os
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline
import matplotlib.pyplot as plt

def compute_growth_rates_from_folder(folder, volume_column="avg_volume", step_column="step", step_interval=1.0, plot=True):
    """
    Computes spline-based growth rates from volume CSV files in a folder.

    Parameters:
    - folder (str): Folder path containing *_volume.csv files.
    - volume_column (str): Name of the volume column (e.g., 'avg_volume' or 'total_volume').
    - step_column (str): Step/time column name (default: 'step').
    - step_interval (float): Time per simulation step (e.g., in hours or days).
    - plot (bool): Whether to show plots of volume and growth rate.

    Returns:
    - dict of DataFrames keyed by filename prefix (e.g., '1_1'), each containing:
      ['step', 'time', 'volume', 'growth_rate']
    """

    results = {}
    for fname in os.listdir(folder):
        if not fname.endswith("_volume.csv"):
            continue

        prefix = fname.replace("_volume.csv", "")
        path = os.path.join(folder, fname)
        df = pd.read_csv(path)

        df_sorted = df.sort_values(by=step_column)
        steps = df_sorted[step_column].values
        volume = df_sorted[volume_column].values

        time = steps * step_interval
        spline = CubicSpline(time, volume)
        growth_rate = spline.derivative()(time)

        df_result = pd.DataFrame({
            "step": steps,
            "time": time,
            "volume": volume,
            "growth_rate": growth_rate
        })

        results[prefix] = df_result

        if plot:
            fig, ax1 = plt.subplots()
            ax1.plot(time, volume, label="Volume", color='blue')
            ax1.set_xlabel("Time")
            ax1.set_ylabel("Volume", color='blue')
            ax1.tick_params(axis='y', labelcolor='blue')

            ax2 = ax1.twinx()
            ax2.plot(time, growth_rate, label="Growth rate", color='red', linestyle='--')
            ax2.set_ylabel("Growth Rate", color='red')
            ax2.tick_params(axis='y', labelcolor='red')

            plt.title(f"Volume and Growth Rate: {prefix}")
            plt.tight_layout()
            plt.show()

    return results


# Finite-Difference Growth Rate Estimation and Comparison

Alternative to spline-based estimation. This function uses smoothed finite differences to calculate the growth rate of spheroid volume.
Useful for visualizing trends across experimental conditions.


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def compute_volume_growth_rate(stats_df, step_column="step", volume_column="avg_volume", smooth_window=5):
    """
    Compute smoothed volume growth rate (Δvolume/Δstep) from simulation stats DataFrame.
    """
    steps = stats_df[step_column].values
    volumes = stats_df[volume_column].values

    delta_volumes = np.diff(volumes)
    delta_steps = np.diff(steps)
    growth_rate = delta_volumes / delta_steps
    mid_steps = (steps[:-1] + steps[1:]) / 2

    if smooth_window > 1:
        kernel = np.ones(smooth_window) / smooth_window
        growth_rate = np.convolve(growth_rate, kernel, mode='valid')
        mid_steps = mid_steps[:len(growth_rate)]

    return mid_steps, growth_rate

def plot_combined_volume_growth_rates(
    folder,
    volume_column="volume_um3",
    step_column="step",
    smooth_window=5,
    max_step=None,
    step_interval=1.0,
    unit="μm³/day",
    ratios_to_plot=None  # NEW PARAMETER
):
    """
    Plot smoothed finite-difference growth rates from selected *_volume.csv files.
    
    Parameters:
    - folder: path to folder with *_volume.csv files
    - volume_column: column name with volume values
    - step_column: column name for simulation steps
    - smooth_window: smoothing window for growth rate
    - max_step: limit the max step to include
    - step_interval: time per step (e.g., 1.0 = 1 day)
    - unit: label for growth rate unit
    - ratios_to_plot: list of file prefixes (e.g., ["1_1", "GBM", "MSC"]) to include
    """
    plt.figure(figsize=(10, 6))
    color_palette = {
        "GBM": "#1f77b4",  # blue
        "1_1": "#2ca02c",  # green
        "MSC": "#ff7f0e",  # orange
        "1_3": "#d62728",  # red
        "3_1": "#9467bd",  # purple
        "2_1": "#8c564b",  # brown
        "5_1": "#e377c2",  # pink
        "10_1": "#7f7f7f",  # gray
        "20_1": "#bcbd22"   # yellow-green
    }
    legend_labels = {
        "1_1": "GBM + MSC 1:1",
        "1_3": "GBM + MSC 1:3",
        "3_1": "GBM + MSC 3:1",
        "GBM": "GBM Alone",
        "MSC": "MSC Alone",
        "10_1": "GBM + MSC 10:1",
        "2_1": "GBM + MSC 2:1",
        "5_1": "GBM + MSC 5:1",
        "20_1": "GBM + MSC 20:1"
    }

    for fname in sorted(os.listdir(folder)):
        if not fname.endswith("_volume.csv"):
            continue

        prefix = fname.replace("_volume.csv", "")
        if ratios_to_plot and prefix not in ratios_to_plot:
            continue  # skip unwanted ratios

        df = pd.read_csv(os.path.join(folder, fname))
        df = df.sort_values(by=step_column)
        if max_step is not None:
            df = df[df[step_column] <= max_step]

        if len(df) < smooth_window + 2:
            continue  # not enough data to smooth

        mid_steps, growth = compute_volume_growth_rate(
            df,
            step_column=step_column,
            volume_column=volume_column,
            smooth_window=smooth_window
        )

        mid_time = mid_steps * step_interval
        growth = growth / step_interval  # convert to per-day

        label = legend_labels.get(prefix, prefix)
        color = color_palette.get(prefix, "black")
        plt.plot(mid_time, growth, label=label, color=color)

    plt.xlabel("Time (days)" if step_interval >= 0.5 else "Time (steps)")
    plt.ylabel(f"Growth Rate ({unit})")
    plt.title("Growth Rates from Simulated Volume Data")
    plt.legend()
    plt.grid(False)
    plt.tight_layout()
    plt.show()
