# CMJ Pipeline (Clean)

This notebook refactors the original pipeline into reusable functions so you can run the same workflow for any metric and generate plots consistently.


In [1]:
import glob
import os
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from matplotlib.lines import Line2D


## Configuration


In [None]:
SRC_PATH = "data/00_src" # Remember to have this data generated from Hawkin Dynamics SDK
COMBINED_PATH = "data/01_raw/01_combined.csv"
PURE_CMJ_PATH = "data/01_raw/02_combined_cmj.csv"
PLOTS_DIR = "data/plots"
STATS_DIR = "data/metric_stats"
LOG_PATH = "data/pipeline_stats.log"

CMJ_TEST_TYPE = "Countermovement Jump"

ATHLETES_TO_DROP = [
    "test test",
    "test walidacja imu",
    "test 1",
    "test 2",
    "test 3",
    "test 4",
    "test 5",
    "test 6",
    "Zawodnik 1",
    "Zawodnik 2",
    "Matt Jordan",
    "Matt Jordan 2",
    "Matt Jordan 4",
    "Matt Jordan 5",
]

COLUMNS_TO_DROP = [
    "athlete_id",
    "external_kl6xI7wHgSTNtCNyjJpYWHYOmmn1",
    "testType_id",
    "testType_canonicalId",
    "tag_ids",
    "athlete_teams",
    "athlete_groups",
    "active",
    "athlete_active",
]

MIN_FAMILIARIZATION_DAYS = 1
MIN_REPS_IN_FAM_SESSION = 3
MIN_REPS_IN_VALID_SESSION = 6
KEEP_FIRST_N = 6
REST_THRESHOLD_SEC = 30

METRICS_CONFIG = [
    ("jump_height_m", "Jump Height [m]"),
    ("mrsi", "mRSI"),
    ("stiffness_n_m", "Stiffness [N/m]"),
    ("avg_relative_braking_force", "Avg. Rel. Braking Force [N/kg]"),
    ("avg_relative_braking_power_w_kg", "Avg. Rel. Braking Power [W/kg]"),
    ("avg_relative_propulsive_force", "Avg. Rel. Propulsive Force [N/kg]"),
    ("avg_relative_propulsive_power_w_kg", "Avg. Rel. Propulsive Power [W/kg]"),
]

G_ACC = 9.80665

## Pipeline technical functions

In [12]:
def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def log_stats(stage: str, df: pl.DataFrame, log_path: str) -> None:
    first_date = df.select(pl.col("test_date").min()).item()
    last_date = df.select(pl.col("test_date").max()).item()
    rep_count = df.select(pl.col("id").len()).item()
    athlete_count = df.select(pl.col("athlete_name").n_unique()).item()
    session_count = df.select(pl.struct(["athlete_name", "test_date"]).n_unique()).item()

    message = f"""
{stage}:
    First CMJ recorded date: {first_date}
    Last CMJ recorded date: {last_date}
    Count of CMJ repetitions: {rep_count}
    Count of athletes: {athlete_count}
    Count of sessions: {session_count}
------------------------------------------------------"""

    ensure_dir(os.path.dirname(log_path) or ".")
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(message)


def log_message(message: str, log_path: str) -> None:
    ensure_dir(os.path.dirname(log_path) or ".")
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(message)



def reset_log(log_path: str) -> None:
    ensure_dir(os.path.dirname(log_path) or ".")
    with open(log_path, "w", encoding="utf-8") as f:
        f.write("")

## Data pipeline functions


In [13]:
def load_and_combine(src_path: str, combined_path: str) -> pl.DataFrame:
    files = glob.glob(os.path.join(src_path, "*.csv"))
    df_all = pl.concat([pl.read_csv(f) for f in files], how="diagonal_relaxed")

    df_all = df_all.filter(~pl.col("athlete_name").is_in(ATHLETES_TO_DROP))
    df_all = df_all.unique("id")
    df_all = df_all.drop(COLUMNS_TO_DROP)

    df_all = df_all.with_columns(
        pl.from_epoch(pl.col("timestamp"), time_unit="s").alias("test_datetime"),
        pl.from_epoch(pl.col("last_sync_time"), time_unit="s").alias("last_sync_time"),
        (pl.col("system_weight_n") / G_ACC).round(2).alias("system_weight_kg"),
    )
    df_all = df_all.with_columns(
        pl.col("test_datetime").dt.date().alias("test_date")
    )

    columns = df_all.columns.copy()
    columns.append(columns.pop(columns.index("last_sync_time")))
    columns.insert(columns.index("timestamp") + 1, columns.pop(columns.index("test_datetime")))
    columns.insert(columns.index("test_datetime") + 1, columns.pop(columns.index("test_date")))
    df_all = df_all.select(columns)

    df_all = df_all.sort("test_datetime")

    ensure_dir(os.path.dirname(combined_path) or ".")
    df_all.write_csv(combined_path)
    return df_all


def filter_cmj(df_all: pl.DataFrame, cmj_test_type: str, pure_cmj_path: str) -> pl.DataFrame:
    df_cmj = df_all.filter(pl.col("testType_name") == cmj_test_type)
    df_cmj = df_cmj.drop("tag_names")
    ensure_dir(os.path.dirname(pure_cmj_path) or ".")
    df_cmj.write_csv(pure_cmj_path)
    return df_cmj


def add_session_rep_columns(df_cmj: pl.DataFrame) -> pl.DataFrame:
    ts_col = "timestamp"
    df = (
        df_cmj
        .sort([ts_col, "athlete_name"])
        .with_columns(
            pl.col("test_date").rank(method="dense").over("athlete_name").alias("session_number")
        )
        .with_columns(
            pl.col(ts_col).rank(method="dense").over(["session_number", "athlete_name"]).alias("rep_number")
        )
        .with_columns(
            pl.col(ts_col).shift(1).over(["athlete_name", "session_number"]).alias("prev_rep_timestamp")
        )
        .with_columns(
            rest_before_rep_seconds=pl.col(ts_col) - pl.col("prev_rep_timestamp")
        )
        .drop("prev_rep_timestamp")
    )

    columns = df.columns.copy()
    columns.insert(0, columns.pop(columns.index("athlete_name")))
    columns.insert(columns.index("test_date") + 1, columns.pop(columns.index("session_number")))
    columns.insert(columns.index("session_number") + 1, columns.pop(columns.index("rep_number")))
    columns.insert(columns.index("rep_number") + 1, columns.pop(columns.index("rest_before_rep_seconds")))
    return df.select(columns)


def filter_sessions(
    df: pl.DataFrame,
    min_fam_days: int,
    min_reps_in_fam: int,
    min_reps_valid: int,
    keep_first_n: int,
    rest_threshold_sec: float,
) -> pl.DataFrame:
    session_keys = ["athlete_name", "session_number"]
    df = df.with_columns(
        pl.col("rep_number").count().over(session_keys).alias("rep_numbers_in_session")
    )
    df = df.filter(pl.col("rep_numbers_in_session") >= min_reps_in_fam)
    df = df.filter(pl.col("session_number") > min_fam_days)
    df = df.filter(pl.col("rep_numbers_in_session") >= min_reps_valid)
    df = df.filter(pl.col("rep_number") <= keep_first_n)

    df = df.join(
        df.filter(
            (pl.col("rep_number") == 4) &
            (pl.col("rest_before_rep_seconds") >= rest_threshold_sec)
        )
        .select(session_keys)
        .unique(),
        on=session_keys,
        how="semi",
    )
    return df


def add_mrsi_session_stats(df: pl.DataFrame) -> pl.DataFrame:
    session_keys = ["athlete_name", "session_number"]
    df = df.with_columns([
        pl.col("mrsi").max().round(4).over(session_keys).alias("max_mrsi_in_session"),
        pl.col("mrsi").mean().round(4).over(session_keys).alias("avg_mrsi_in_session"),
        pl.col("mrsi").sort(descending=True).head(3).mean().round(4).over(session_keys)
        .alias("avg_3_best_mrsi_in_session"),
    ])

    columns = df.columns.copy()
    columns.insert(columns.index("mrsi") + 1, columns.pop(columns.index("max_mrsi_in_session")))
    columns.insert(columns.index("mrsi") + 2, columns.pop(columns.index("avg_mrsi_in_session")))
    columns.insert(columns.index("mrsi") + 3, columns.pop(columns.index("avg_3_best_mrsi_in_session")))
    return df.select(columns)


def add_within_athlete_zscore_abs(df: pl.DataFrame, metric_col: str, ddof: int = 1) -> pl.DataFrame:
    df_abs = df.with_columns(pl.col(metric_col).abs())
    stats = (
        df_abs.group_by("athlete_name")
        .agg([
            pl.col(metric_col).mean().alias("m_mean"),
            pl.col(metric_col).std(ddof=ddof).alias("m_sd"),
        ])
        .with_columns(pl.col("m_sd").replace(0, None).alias("m_sd_safe"))
    )
    df2 = df_abs.join(stats, on="athlete_name", how="left")
    z_expr = (pl.col(metric_col) - pl.col("m_mean")) / pl.col("m_sd_safe")
    return df2.with_columns(z_expr.fill_null(0.0).fill_nan(0.0).alias("z_score")).drop(
        ["m_mean", "m_sd", "m_sd_safe"]
    )


def best_session_ever(df: pl.DataFrame, kpi_col: str) -> pl.DataFrame:
    return (
        df.with_columns(
            pl.col("session_number")
            .sort_by(pl.col(kpi_col).abs(), descending=True)
            .first()
            .over("athlete_name")
            .alias("best_session_number")
        )
        .filter(pl.col("session_number") == pl.col("best_session_number"))
        .drop("best_session_number")
    )


def get_best_reps(df: pl.DataFrame, selector: str) -> pl.DataFrame:
    return (
        df.with_columns(
            pl.col("rep_number")
            .sort_by(pl.col(selector).abs(), descending=True)
            .first()
            .over("athlete_name")
            .alias("best_rep_number")
        )
        .filter(pl.col("rep_number") == pl.col("best_rep_number"))
        .drop("best_rep_number")
    )

def write_best_rep_stats(best_rep_df: pl.DataFrame, out_path: str) -> None:
    stats_df = (
        best_rep_df
        .group_by("rep_number")
        .len()
        .rename({"len": "count"})
        .sort("rep_number")
    )
    ensure_dir(os.path.dirname(out_path) or ".")
    stats_df.write_csv(out_path)

def add_sex_height_from_csv(df: pl.DataFrame, src_file: str) -> pl.DataFrame:
    df_sex_height = pl.read_csv(src_file)
    df = df.join(
        df_sex_height,
        on="athlete_name",
        how="left"
    )

    if df.filter(pl.col("sex").is_null() | (pl.col("sex") == "")).height > 0:
        raise Exception ("There are athletes without assigned sex!")

    if df.filter(pl.col("height_cm").is_null() | (pl.col("height_cm") == 0)).height > 0:
        raise Exception ("There are athletes without height_cm!")

    
    columns = df.columns.copy()

    columns.insert(columns.index("athlete_name") + 1, columns.pop(columns.index("sex")))
    columns.insert(columns.index("athlete_name") + 2, columns.pop(columns.index("height_cm")))

    return df.select(columns)

## Plotting functions


In [14]:
CI_COLOR = "#ff8c00"
MEDIAN_COLOR = "#b35900"
MEAN_COLOR = "#d62728"
BOX_WIDTH = 0.25
MEAN_X_OFFSET = BOX_WIDTH / 2.0
SEED_BOOT = 42
SEED_JITTER = 123
REP_COLS = [1, 2, 3, 4, 5, 6]


def set_manuscript_font() -> None:
    available = {f.name for f in fm.fontManager.ttflist}
    family = (
        ("Times New Roman" in available and "Times New Roman")
        or ("Nimbus Roman" in available and "Nimbus Roman")
        or "serif"
    )

    plt.rcParams.update({
        "font.family": family,
        "font.size": 12,
        "axes.titlesize": 11,
        "axes.labelsize": 12,
        "xtick.labelsize": 11,
        "ytick.labelsize": 11,
    })


def bootstrap_median_ci(values: np.ndarray, B: int = 5000, rng: np.random.Generator | None = None):
    values = np.asarray(values, dtype=float)
    n = len(values)
    rng = rng or np.random.default_rng(SEED_BOOT)

    boot = np.empty(B, dtype=float)
    for b in range(B):
        boot[b] = np.median(rng.choice(values, size=n, replace=True))

    med = float(np.median(values))
    lo = float(np.percentile(boot, 2.5))
    hi = float(np.percentile(boot, 97.5))
    return med, lo, hi


def add_n_annotation(ax: plt.Axes, n: int) -> None:
    ax.text(0.99, 0.02, f"n={n}", transform=ax.transAxes, ha="right", va="bottom", fontsize=9)


def plot_raincloud(
    df_bestday,
    title: str,
    out_png: str,
    rep_col: str = "rep_number",
    z_col: str = "z_score",
    athlete_col: str = "athlete_name",
    y_label: str = "Within-athlete z-score",
) -> None:
    df_plot = (
        df_bestday
        .with_columns([
            pl.col(rep_col).cast(pl.Int64, strict=False).alias(rep_col),
            pl.col(z_col).cast(pl.Float64, strict=False).alias(z_col),
        ])
        .filter(pl.col(rep_col).is_between(1, 6))
        .filter(pl.col(z_col).is_not_null())
    )

    data_by_rep = [
        df_plot.filter(pl.col(rep_col) == r).get_column(z_col).to_numpy()
        for r in REP_COLS
    ]
    means = np.array([
        np.mean(np.asarray(v, dtype=float)) for v in data_by_rep
    ], dtype=float)

    rng_ci = np.random.default_rng(SEED_BOOT)
    ci_low, ci_high = [], []
    for vals in data_by_rep:
        _, lo, hi = bootstrap_median_ci(vals, B=5000, rng=rng_ci)
        ci_low.append(lo)
        ci_high.append(hi)
    ci_low = np.array(ci_low, dtype=float)
    ci_high = np.array(ci_high, dtype=float)

    fig, ax = plt.subplots(figsize=(7.2, 4.8))
    vp = ax.violinplot(
        data_by_rep,
        positions=REP_COLS,
        widths=0.85,
        showmeans=False,
        showmedians=False,
        showextrema=False,
    )
    for body in vp["bodies"]:
        body.set_alpha(0.30)
        body.set_zorder(1)

    rng_points = np.random.default_rng(SEED_JITTER)
    x_vals = df_plot.get_column(rep_col).to_numpy().astype(float)
    y_vals = df_plot.get_column(z_col).to_numpy().astype(float)
    x_jitter = x_vals + rng_points.uniform(-0.12, 0.12, size=len(x_vals))
    ax.scatter(x_jitter, y_vals, alpha=0.25, s=10, zorder=2)

    ax.boxplot(
        data_by_rep,
        positions=REP_COLS,
        widths=BOX_WIDTH,
        showfliers=False,
        medianprops={"linewidth": 1.2, "color": MEDIAN_COLOR},
        whiskerprops={"linewidth": 1.0},
        capprops={"linewidth": 1.0},
        boxprops={"linewidth": 1.0},
    )

    for x, lo, hi in zip(REP_COLS, ci_low, ci_high):
        ax.vlines(x, lo, hi, linewidth=0.8, color=CI_COLOR, zorder=4)

    mean_x = np.array(REP_COLS, dtype=float) + MEAN_X_OFFSET
    ax.scatter(mean_x, means, s=8, color=MEAN_COLOR, zorder=5)

    ax.set_title(title)
    ax.set_xlabel("Trial number")
    ax.set_ylabel(y_label)
    ax.set_xticks(REP_COLS)
    ax.set_xticklabels([str(r) for r in REP_COLS])
    ax.axhline(0, linestyle="--", linewidth=1.0, zorder=0)

    n = int(df_plot.select(pl.col(athlete_col).n_unique()).item())
    add_n_annotation(ax, n)

    handles = [
        Line2D([0], [0], marker="o", linestyle="None", markersize=4,
               markerfacecolor=MEAN_COLOR, markeredgecolor=MEAN_COLOR, label="Mean"),
        Line2D([0], [0], color=CI_COLOR, linewidth=1.0, label="Median (95% CI)"),
    ]
    ax.legend(
        handles=handles,
        frameon=False,
        loc="upper right",
        bbox_to_anchor=(0.985, 0.98),
        fontsize=9,
        handlelength=1.0,
        handletextpad=0.4,
        borderaxespad=0.0,
    )

    fig.tight_layout()
    fig.savefig(out_png, dpi=600)
    plt.close(fig)


def plot_bestrep_distribution(
    best_info,
    title: str,
    out_png: str,
    rep_col: str = "rep_number",
    athlete_col: str = "athlete_name",
) -> None:
    counts_df = best_info.group_by(rep_col).len().rename({"len": "count"})
    counts_map = dict(zip(counts_df.get_column(rep_col).to_list(), counts_df.get_column("count").to_list()))
    counts = np.array([int(counts_map.get(r, 0)) for r in REP_COLS], dtype=int)
    total = counts.sum()
    pct = np.divide(counts, total, out=np.zeros_like(counts, dtype=float), where=total != 0) * 100

    fig, ax = plt.subplots(figsize=(7.2, 4.8))
    x = np.array(REP_COLS)
    y = pct

    ax.bar(x, y)
    ax.set_xlabel("Trial number")
    ax.set_ylabel("Athletes (%)")
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_ylim(0, max(max(y) * 1.25, 1))

    for rep, pct_val, n_ct in zip(x, y, counts):
        ax.text(rep, pct_val, f"n={int(n_ct)}", ha="center", va="bottom", fontsize=9)

    n = int(best_info.select(pl.col(athlete_col).n_unique()).item())
    add_n_annotation(ax, n)

    fig.tight_layout()
    fig.savefig(out_png, dpi=600)
    plt.close(fig)


def plot_bestrep_cumulative(
    best_info,
    title: str,
    out_png: str,
    rep_col: str = "rep_number",
    athlete_col: str = "athlete_name",
) -> None:
    counts_df = best_info.group_by(rep_col).len().rename({"len": "count"})
    counts_map = dict(zip(counts_df.get_column(rep_col).to_list(), counts_df.get_column("count").to_list()))
    counts = np.array([int(counts_map.get(r, 0)) for r in REP_COLS], dtype=int)
    total = counts.sum()
    pct = np.divide(counts, total, out=np.zeros_like(counts, dtype=float), where=total != 0) * 100
    cum = np.cumsum(pct)

    fig, ax = plt.subplots(figsize=(7.2, 4.8))
    x = np.array(REP_COLS)

    ax.step(x, cum, where="post")
    ax.set_xlabel("Trial number")
    ax.set_ylabel("Cumulative probability (%)")
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_yticks(range(0, 101, 10))
    ax.set_ylim(0, 100)

    n = int(best_info.select(pl.col(athlete_col).n_unique()).item())
    add_n_annotation(ax, n)

    fig.tight_layout()
    fig.savefig(out_png, dpi=600)
    plt.close(fig)


## Generic plot runner


In [15]:
def generate_metric_plots(
    df_all_sessions: pl.DataFrame,
    df_best_session: pl.DataFrame,
    metric_col: str,
    metric_label: str,
    out_dir: str,
    out_prefix: str,
    z_ddof: int = 1,
) -> pl.DataFrame:
    df_all_z = add_within_athlete_zscore_abs(df_all_sessions, metric_col, ddof=z_ddof)

    df_best = df_best_session.select([
        "athlete_name", "session_number"
    ])

    df_best_z = df_all_z.join(
        df_best,
        on=["athlete_name", "session_number"],
        how="semi",
    )

    best_rep_df = get_best_reps(df_best_z, metric_col)

    plot_raincloud(
        df_best_z,
        title=f"CMJ ({metric_label}) – within-athlete z-score distribution by trial",
        out_png=f"{out_dir}/{out_prefix}_raincloud.png",
        rep_col="rep_number",
        z_col="z_score",
        athlete_col="athlete_name",
        y_label="Within-athlete z-score",
    )

    plot_bestrep_distribution(
        best_rep_df,
        title=f"CMJ ({metric_label}) – distribution of best repetition by trial",
        out_png=f"{out_dir}/{out_prefix}_bestrep_distribution.png",
        rep_col="rep_number",
        athlete_col="athlete_name",
    )

    plot_bestrep_cumulative(
        best_rep_df,
        title=f"CMJ ({metric_label}) – cumulative probability of best repetition by trial",
        out_png=f"{out_dir}/{out_prefix}_bestrep_cumulative.png",
        rep_col="rep_number",
        athlete_col="athlete_name",
    )

    return best_rep_df


## Run pipeline


In [16]:
ensure_dir(PLOTS_DIR)
ensure_dir(STATS_DIR)
set_manuscript_font()
reset_log(LOG_PATH)

df_all = load_and_combine(SRC_PATH, COMBINED_PATH)
df_cmj = filter_cmj(df_all, CMJ_TEST_TYPE, PURE_CMJ_PATH)
df_cmj_sessions = add_session_rep_columns(df_cmj)

TECHNICAL_COLUMNS = [
    "id", "athlete_name", "timestamp", "test_datetime",
    "test_date", "session_number", "rep_number",
    "rest_before_rep_seconds", "testType_name", 
    "system_weight_n", "system_weight_kg"
    
]

METRICS_COLUMNS = [
    "jump_height_m", "mrsi", "stiffness_n_m", "avg_relative_braking_force",
    "avg_relative_braking_power_w_kg", "avg_relative_propulsive_force",
    "avg_relative_propulsive_power_w_kg",
]

df_filtered = df_cmj_sessions.select(TECHNICAL_COLUMNS + METRICS_COLUMNS)

log_stats("Initially", df_filtered, LOG_PATH)

df_filtered = (
    df_filtered
    .with_columns(
        pl.col("rep_number").count().over(["athlete_name", "session_number"]).alias("rep_numbers_in_session")
    )
    .filter(pl.col("rep_numbers_in_session") >= MIN_REPS_IN_FAM_SESSION)
)

log_stats("Sessions with at least 3 repetitions", df_filtered, LOG_PATH)

df_filtered = df_filtered.filter(pl.col("session_number") > MIN_FAMILIARIZATION_DAYS)

log_stats("One familiarization session removed from dataset", df_filtered, LOG_PATH)

df_filtered = df_filtered.filter(pl.col("rep_numbers_in_session") >= MIN_REPS_IN_VALID_SESSION)

log_stats("Correct sessions with at least 6 repetitions", df_filtered, LOG_PATH)

df_filtered = df_filtered.filter(pl.col("rep_number") <= KEEP_FIRST_N)

log_stats("Correct tests protocols - removed repetitions above 6th", df_filtered, LOG_PATH)

df_filtered = df_filtered.join(
    df_filtered
    .filter(
        (pl.col("rep_number") == 4) &
        (pl.col("rest_before_rep_seconds") >= REST_THRESHOLD_SEC)
    )
    .select(["athlete_name", "session_number"])
    .unique(),
    on=["athlete_name", "session_number"],
    how="semi",
)

log_stats("Correct tests protocols with at least 30 seconds of rest", df_filtered, LOG_PATH)

df_filtered = add_mrsi_session_stats(df_filtered)

df_filtered = add_sex_height_from_csv(df_filtered, "data/athletes.csv")

mean_n = df_filtered.select(pl.col("system_weight_n").mean().round(2)).item()
std_n = df_filtered.select(pl.col("system_weight_n").std(ddof=0).round(2)).item()
mean_kg = df_filtered.select(pl.col("system_weight_kg").mean().round(2)).item()
std_kg = df_filtered.select(pl.col("system_weight_kg").std(ddof=0).round(2)).item()
mean_height = df_filtered.select(pl.col("height_cm").mean().round(2)).item()
std_height = df_filtered.select(pl.col("height_cm").std(ddof=0).round(2)).item()
males_cnt = df_filtered.filter(pl.col("sex") == "M").select("athlete_name").unique().height
females_cnt = df_filtered.filter(pl.col("sex") == "F").select("athlete_name").unique().height

log_message(f"""
Demographics of filtered dataset:
    Average weight in N: {mean_n},
    STD of system weight in N: {std_n},

    Average weight in kg: {mean_kg},
    STD of weight in kg: {std_kg},
    
    Average height in cm: {mean_height},
    STD of height in cm: {std_height},
    
    Males count: {males_cnt},
    Females count: {females_cnt}
""",
LOG_PATH)



## Choose best session and generate plots for a metric


In [17]:
# Choose best session based on max mRSI and generate plots for each metric
best_session = best_session_ever(df_filtered, "max_mrsi_in_session")

for metric_col, metric_label in METRICS_CONFIG:
    metric_dir = f"{PLOTS_DIR}/{metric_col}"
    ensure_dir(metric_dir)

    best_rep_df = generate_metric_plots(
        df_all_sessions=df_filtered,
        df_best_session=best_session,
        metric_col=metric_col,
        metric_label=metric_label,
        out_dir=metric_dir,
        out_prefix=f"CMJ_{metric_col}",
        z_ddof=1,
    )
    write_best_rep_stats(best_rep_df, f"{STATS_DIR}/best_rep_{metric_col}.csv")
