In [None]:
import glob
import json
import os
import re

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from fucciphase import process_trackmate
from fucciphase.phase import (
    estimate_cell_cycle_percentage,
    estimate_percentage_by_subsequence_alignment,
)
from fucciphase.plot import plot_normalized_intensities
from fucciphase.sensor import FUCCISASensor
from fucciphase.utils import normalize_channels, postprocess_estimated_percentages

In [None]:
if not os.path.isdir("figures"):
    os.mkdir("figures")

## Read all TrackMate outputs

* Multiple videos were processed are saved as XML files
* The XML files should follow a template naming scheme so that it can be identified

In [None]:
template_file = "merged_linked.ome.xml"
track_files = glob.glob(f"*/{template_file}")
print(track_files)

# General information

In [None]:
cyan_channel = "MEAN_INTENSITY_CH2"
magenta_channel = "MEAN_INTENSITY_CH1"
regex = r"Track_[0-9]+\.[a-z]+"
timestep = 0.25  # in hours
sensorfile = "example_data/fuccisa_hacat.json"
reference_file = "example_data/hacat_fucciphase_reference.csv"

In [None]:
reference_df = pd.read_csv(reference_file)
reference_df.rename(
    columns={"cyan": cyan_channel, "magenta": magenta_channel}, inplace=True
)

In [None]:
with open(sensorfile) as fp:
    sensor_properties = json.load(fp)

In [None]:
sensor = FUCCISASensor(**sensor_properties)

# Get all tracks with full cycle

In [None]:
# tracks followed by another branch
dfs_save_tracks = []
# tracks that are not followed by another branch
dfs_candidate_tracks = []
for track_file in track_files:
    print(track_file)
    df = process_trackmate(
        track_file,
        channels=[cyan_channel, magenta_channel],
        sensor=sensor,
        thresholds=[0.1, 0.1],
        generate_unique_tracks=True,
    )
    all_names = df["name"].unique()
    candidate_tracks = []
    track_ids = df["UNIQUE_TRACK_ID"].unique()
    for track_id in track_ids:
        track = df[df["UNIQUE_TRACK_ID"] == track_id]
        name = track["name"].iloc[0]
        last_frame = track["FRAME"].max()
        # is the track a subtrack
        match = re.match(regex, name)
        # is there a subtrack
        next_match = any(df["name"].str.match(name + "[a-z]+").unique())
        if match is not None and last_frame < df["FRAME"].max():
            print("Track ID: ", track_id)
            if next_match:
                dfs_save_tracks.append(track)
                title = f"Save track: {track_id}, {name}"
            else:
                if len(track) > 40:
                    dfs_candidate_tracks.append(track)
                    title = f"Candidate track: {track_id}, {name}"
                else:
                    continue

Use all tracks

In [None]:
dfs_save_tracks.extend(dfs_candidate_tracks)

## Process the DataFrame

In [None]:
for idx, df in enumerate(dfs_save_tracks):
    # insert ground truth
    df["percentage"] = (
        100.0
        * (df["FRAME"] - df["FRAME"].min())
        / (df["FRAME"].max() - df["FRAME"].min())
    )
    postprocess_estimated_percentages(
        df, percentage_column="CELL_CYCLE_PERC", track_id_name="UNIQUE_TRACK_ID"
    )
    estimate_percentage_by_subsequence_alignment(
        df,
        dt=0.25,
        channels=[cyan_channel, magenta_channel],
        reference_data=reference_df,
        track_id_name="UNIQUE_TRACK_ID",
    )
    postprocess_estimated_percentages(
        df, percentage_column="CELL_CYCLE_PERC_DTW", track_id_name="UNIQUE_TRACK_ID"
    )

    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="percentage",
        time_label="Percentage w.r.t. total time",
    )
    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="CELL_CYCLE_PERC_POST",
        time_label="Percentage w.r.t. total time",
        marker="o",
        linestyle="",
    )
    plt.savefig(f"figures/global_normalization_static_intensities_{idx}.pdf")
    plt.savefig(f"figures/global_normalization_static_intensities_{idx}.svg")
    plt.show()

    plt.plot(df["percentage"], df["percentage"], color="black")
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC"])
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC_POST"], "o")
    plt.xlabel("Expected percentage")
    plt.ylabel("Reconstructed percentage")
    plt.savefig(f"figures/global_normalization_static_percentage_{idx}.pdf")
    plt.savefig(f"figures/global_normalization_static_percentage_{idx}.svg")
    plt.show()

    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC"]),
        label="Reconstructed",
    )
    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC_POST"]),
        "o",
        label="Reconstructed + post",
    )
    plt.ylabel("Percentage error")
    plt.xlabel("Frame")
    plt.savefig(f"figures/global_normalization_static_error_{idx}.pdf")

    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="percentage",
        time_label="Percentage w.r.t. total time",
    )
    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="CELL_CYCLE_PERC_DTW_POST",
        time_label="Percentage w.r.t. total time",
        marker="o",
        linestyle="",
    )
    plt.savefig(f"figures/global_normalization_dynamic_intensities_{idx}.pdf")
    plt.savefig(f"figures/global_normalization_dynamic_intensities_{idx}.svg")

    plt.show()

    plt.plot(df["percentage"], df["percentage"], color="black", label="Ideal")
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC_DTW"], label="Reconstructed")
    plt.plot(
        df["percentage"], df["CELL_CYCLE_PERC_DTW_POST"], "o", label="Postprocessed"
    )
    plt.xlabel("Expected percentage")
    plt.ylabel("Reconstructed percentage")
    plt.legend()
    plt.savefig(f"figures/global_normalization_dynamic_percentages_{idx}.pdf")
    plt.savefig(f"figures/global_normalization_dynamic_percentages_{idx}.svg")
    plt.show()

    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC_DTW"]),
        label="Reconstructed",
    )
    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC_DTW_POST"]),
        "o",
        label="Reconstructed + post",
    )
    plt.legend(framealpha=1.0)
    plt.ylabel("Percentage error")
    plt.xlabel("Frame")
    plt.savefig(f"figures/global_normalization_dynamic_error_{idx}.pdf")
    plt.savefig(f"figures/global_normalization_dynamic_error_{idx}.svg")
    plt.show()

In [None]:
percentage_errors = []

for idx, df in enumerate(dfs_save_tracks):
    print(len(df))
    normalize_channels(
        df, channels=[cyan_channel, magenta_channel], track_id_name="UNIQUE_TRACK_ID"
    )
    # insert ground truth
    df["percentage"] = (
        100.0
        * (df["FRAME"] - df["FRAME"].min())
        / (df["FRAME"].max() - df["FRAME"].min())
    )
    estimate_cell_cycle_percentage(
        df,
        sensor=sensor,
        channels=[cyan_channel + "_NORM", magenta_channel + "_NORM"],
        phase_column="DISCRETE_PHASE_MAX",
    )
    postprocess_estimated_percentages(
        df, percentage_column="CELL_CYCLE_PERC", track_id_name="UNIQUE_TRACK_ID"
    )

    estimate_percentage_by_subsequence_alignment(
        df,
        dt=0.25,
        channels=[cyan_channel, magenta_channel],
        reference_data=reference_df,
        track_id_name="UNIQUE_TRACK_ID",
    )
    postprocess_estimated_percentages(
        df, percentage_column="CELL_CYCLE_PERC_DTW", track_id_name="UNIQUE_TRACK_ID"
    )

    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="percentage",
        time_label="Percentage w.r.t. total time",
    )
    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="CELL_CYCLE_PERC_POST",
        time_label="Percentage w.r.t. total time",
        marker="o",
        linestyle="",
    )
    plt.savefig(f"figures/trackwise_normalization_static_intensities_{idx}.pdf")
    plt.savefig(f"figures/trackwise_normalization_static_intensities_{idx}.svg")
    plt.show()

    plt.plot(df["percentage"], df["percentage"], color="black")
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC"])
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC_POST"], "o")
    plt.xlabel("Expected percentage")
    plt.ylabel("Reconstructed percentage")
    plt.savefig(f"figures/trackwise_normalization_static_percentage_{idx}.pdf")
    plt.savefig(f"figures/trackwise_normalization_static_percentage_{idx}.svg")
    plt.show()

    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC"]),
    )
    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC_POST"]),
        "o",
    )
    plt.ylabel("Percentage error")
    plt.xlabel("Frame")
    plt.savefig(f"figures/trackwise_normalization_static_error_{idx}.pdf")
    plt.savefig(f"figures/trackwise_normalization_static_error_{idx}.svg")

    plt.show()
    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="percentage",
        time_label="Percentage w.r.t. total time",
    )
    plot_normalized_intensities(
        df,
        cyan_channel,
        magenta_channel,
        "tab:cyan",
        "m",
        time_column="CELL_CYCLE_PERC_DTW_POST",
        time_label="Percentage w.r.t. total time",
        marker="o",
        linestyle="",
    )
    plt.savefig(f"figures/trackwise_normalization_dynamic_intensities_{idx}.pdf")
    plt.savefig(f"figures/trackwise_normalization_dynamic_intensities_{idx}.svg")

    plt.show()

    plt.plot(df["percentage"], df["percentage"], color="black")
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC_DTW"])
    plt.plot(df["percentage"], df["CELL_CYCLE_PERC_DTW_POST"], "o")
    plt.xlabel("Expected percentage")
    plt.ylabel("Reconstructed percentage")
    plt.savefig(f"figures/trackwise_normalization_dynamic_percentages_{idx}.pdf")
    plt.savefig(f"figures/trackwise_normalization_dynamic_percentages_{idx}.svg")
    plt.show()

    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC_DTW"]),
        label="Reconstructed",
    )
    plt.plot(
        df["FRAME"] - df["FRAME"].min(),
        np.abs(df["percentage"] - df["CELL_CYCLE_PERC_DTW_POST"]),
        "o",
        label="Reconstructed + post",
    )
    plt.legend(framealpha=1.0)
    plt.ylabel("Percentage error")
    plt.xlabel("Frame")
    plt.savefig(f"figures/trackwise_normalization_dynamic_error_{idx}.pdf")
    plt.savefig(f"figures/trackwise_normalization_dynamic_error_{idx}.svg")
    plt.show()

    percentage_errors.append(np.abs(df["percentage"] - df["CELL_CYCLE_PERC_DTW"]))
    print(len(df["FRAME"]), len(percentage_errors[-1]))

In [None]:
def get_windows_start_points(len_track, subsequence_length, n_windows):
    """Get number of random starting points for subsequence window."""
    rng = np.random.default_rng(seed=42)
    max_start_point = len_track - subsequence_length - 1
    return rng.integers(low=0, high=max_start_point, size=n_windows)


def run_subsequence_analysis(
    dfs_save_tracks, subsequence_length, show=False, n_windows=30
):
    """Analyse impact of subsequence length on accuracy."""
    expected_mean_errors = []
    realized_mean_errors = []

    for idx, df in enumerate(dfs_save_tracks):
        windows_start_points = get_windows_start_points(
            len(df), subsequence_length, n_windows
        )
        for n in range(n_windows):
            start_point = windows_start_points[n]
            # subdf = df.iloc[n * subsequence_length : (n + 1) * subsequence_length]
            subdf = df.iloc[start_point : start_point + subsequence_length]
            estimate_percentage_by_subsequence_alignment(
                subdf,
                dt=0.25,
                channels=[cyan_channel, magenta_channel],
                reference_data=reference_df,
                track_id_name="UNIQUE_TRACK_ID",
                minimum_track_length=0,
            )
            postprocess_estimated_percentages(
                subdf,
                percentage_column="CELL_CYCLE_PERC_DTW",
                track_id_name="UNIQUE_TRACK_ID",
            )
            print(
                f" Start: {start_point}, Len: {len(df)}, "
                f" {len(percentage_errors[idx])} perc. error",
                percentage_errors[idx].to_numpy()[
                    start_point : start_point + subsequence_length
                ],
                percentage_errors[idx].iloc[
                    start_point : start_point + subsequence_length
                ],
            )
            expected_mean_errors.append(
                np.mean(
                    percentage_errors[idx].iloc[
                        start_point : start_point + subsequence_length
                    ]
                )
            )
            realized_mean_errors.append(
                np.mean(np.abs(subdf["percentage"] - subdf["CELL_CYCLE_PERC_DTW_POST"]))
            )

            if show:
                plot_normalized_intensities(
                    subdf,
                    cyan_channel,
                    magenta_channel,
                    "tab:cyan",
                    "m",
                    time_column="percentage",
                    time_label="Percentage w.r.t. total time",
                )
                plot_normalized_intensities(
                    subdf,
                    cyan_channel,
                    magenta_channel,
                    "tab:cyan",
                    "m",
                    time_column="CELL_CYCLE_PERC_DTW_POST",
                    time_label="Percentage w.r.t. total time",
                    marker="o",
                    linestyle="",
                )

                plt.show()

                plt.plot(subdf["percentage"], subdf["percentage"], color="black")
                plt.plot(subdf["percentage"], subdf["CELL_CYCLE_PERC_DTW"])
                plt.plot(subdf["percentage"], subdf["CELL_CYCLE_PERC_DTW_POST"], "o")
                plt.xlabel("Expected percentage")
                plt.ylabel("Reconstructed percentage")
                plt.show()

                plt.plot(
                    subdf["FRAME"] - subdf["FRAME"].min(),
                    np.abs(subdf["percentage"] - subdf["CELL_CYCLE_PERC_DTW"]),
                )
                plt.plot(
                    subdf["FRAME"] - subdf["FRAME"].min(),
                    np.abs(subdf["percentage"] - subdf["CELL_CYCLE_PERC_DTW_POST"]),
                    "o",
                )
                plt.plot(
                    subdf["FRAME"] - subdf["FRAME"].min(),
                    percentage_errors[idx].iloc[
                        n * subsequence_length : (n + 1) * subsequence_length
                    ],
                    label="Full sequence",
                )
                plt.legend(framealpha=1.0)
                plt.ylabel("Percentage error")
                plt.xlabel("Frame")
                plt.show()

    if show:
        plt.plot(expected_mean_errors, label="Expected")
        plt.plot(realized_mean_errors, label="Realized")
        plt.show()
    print(
        f"Expected: {np.mean(expected_mean_errors)} +- {np.std(expected_mean_errors)}"
    )
    print(
        f"Realized: {np.mean(realized_mean_errors)} +- {np.std(realized_mean_errors)}"
    )
    return expected_mean_errors, realized_mean_errors

In [None]:
expected_error = {}
realized_error = {}
for subsequence_length in [5, 10, 20, 30]:
    exp, real = run_subsequence_analysis(
        dfs_save_tracks, subsequence_length=subsequence_length
    )
    expected_error[subsequence_length] = exp
    realized_error[subsequence_length] = real

In [None]:
for idx, subsequence_length in enumerate(expected_error):
    print("#########")
    print(subsequence_length)
    # print(expected_error)
    # print(realized_error)
    print(len(expected_error[subsequence_length]))
    print(len(realized_error[subsequence_length]))
    print("#########")
    expected_mean_errors = expected_error[subsequence_length]
    realized_mean_errors = realized_error[subsequence_length]
    plt.errorbar(
        subsequence_length,
        np.mean(realized_mean_errors),
        np.std(realized_mean_errors),
        fmt="v",
        capsize=5,
        color="blue",
        label="Subsequence error",
    )
    plt.errorbar(
        subsequence_length,
        np.nanmean(expected_mean_errors),
        np.nanstd(expected_mean_errors),
        fmt="o",
        capsize=5,
        color="orange",
        label="Reference error",
    )
    if idx == 0:
        plt.legend()

plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence.svg")
plt.show()

In [None]:
realized_error

In [None]:
realized_mean_errors = []
subsequence_lengths = []
for subsequence_length in expected_error:
    realized_mean_errors.append(realized_error[subsequence_length])
    subsequence_lengths.append(subsequence_length)
    # realized_mean_errors = realized_error[subsequence_length]
bp = plt.boxplot(realized_mean_errors, labels=subsequence_lengths)
plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence_boxplot_no_dots.svg")
plt.show()

In [None]:
realized_mean_errors = []
subsequence_lengths = []
for subsequence_length in expected_error:
    realized_mean_errors.append(realized_error[subsequence_length])
    subsequence_lengths.append(subsequence_length)
    # realized_mean_errors = realized_error[subsequence_length]
bp = plt.boxplot(realized_mean_errors, labels=subsequence_lengths)
for idx, _ in enumerate(subsequence_lengths):
    x = np.random.normal(idx + 1, 0.08, size=len(realized_mean_errors[idx]))
    plt.plot(x, realized_mean_errors[idx], ".", color="grey", alpha=0.2)
plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence_boxplot.svg")
plt.show()

In [None]:
realized_mean_errors = []
subsequence_lengths = []
for subsequence_length in expected_error:
    realized_mean_errors.append(realized_error[subsequence_length])
    subsequence_lengths.append(subsequence_length)
    # realized_mean_errors = realized_error[subsequence_length]
bp = plt.violinplot(
    realized_mean_errors,
    showmeans=True,
    # labels=subsequence_lengths
)
plt.xticks(range(1, len(subsequence_lengths) + 1), subsequence_lengths)
plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence_violinplot.svg")
plt.show()

In [None]:
realized_mean_errors = []
subsequence_lengths = []
for subsequence_length in expected_error:
    realized_mean_errors.append(expected_error[subsequence_length])
    subsequence_lengths.append(subsequence_length)
bp = plt.boxplot(realized_mean_errors, labels=subsequence_lengths)
plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence_reference_boxplot_no_dots.svg")
plt.show()

In [None]:
realized_mean_errors = []
subsequence_lengths = []
for subsequence_length in expected_error:
    realized_mean_errors.append(expected_error[subsequence_length])
    subsequence_lengths.append(subsequence_length)
bp = plt.boxplot(realized_mean_errors, labels=subsequence_lengths)
for idx, _ in enumerate(subsequence_lengths):
    x = np.random.normal(idx + 1, 0.08, size=len(realized_mean_errors[idx]))
    plt.plot(x, realized_mean_errors[idx], ".", color="grey", alpha=0.2)
plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence_reference_boxplot.svg")
plt.show()

In [None]:
realized_mean_errors = []
subsequence_lengths = []
for subsequence_length in expected_error:
    realized_mean_errors.append(expected_error[subsequence_length])
    subsequence_lengths.append(subsequence_length)
bp = plt.violinplot(
    realized_mean_errors,
)
plt.xticks(range(1, len(subsequence_lengths) + 1), subsequence_lengths)
plt.ylabel("Percentage error")
plt.xlabel("Subsequence length")
plt.savefig("error_subsequence_reference_violinplot.svg")
plt.show()