In [None]:
import numpy as np
import pandas as pd

from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from biopsykit.utils.dataframe_handling import multi_xs

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from fau_colors import register_cmaps
register_cmaps()

sns.set_theme(context="talk", style="white", palette="faculties_light", font_scale=1.2)

%matplotlib widget

In [None]:
df = pd.read_pickle("full_df.pkl")

# 5 stage

In [None]:
df_5 = multi_xs(df, ["5stage"], level="stage")
df_5 = multi_xs(df_5, ["accuracy", "f1", "mcc"],level="metric")[["data"]].astype(float)

In [None]:
df_5_mean = df_5.groupby(level=["algorithm type", "algorithm", "modality", "stage", "metric"]).agg(["median"])
df_5_mean.unstack("modality")["data"]["median"][["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + EDR"]]

In [None]:
df_5_plot = multi_xs(df_5, ["mcc"], level="metric")
df_5_plot = df_5_plot.rename(columns={'ACT + HRV + EDR': 'ACT + HRV + ED-RRV'})


In [None]:
fig, ax = plt.subplots(figsize=(16, 5))
sns.boxplot(
    data=multi_xs(
        df_5_plot, ["MLP", "AdaBoost", "SVM", "Random Forest","XGBoost", "LSTM", "TCN"], level="algorithm"
    ).reset_index(),
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + ED-RRV"],
    ax=ax,
)
ax.legend(bbox_to_anchor=(1, 1))

plt.title("5 stage - MCC - overall performance")

# 3 stage

In [None]:
df_3 = multi_xs(df, ["3stage"], level="stage")
df_3 = multi_xs(df_3, ["accuracy", "f1", "mcc"],level="metric")[["data"]].astype(float)

In [None]:
df_3_mean = df_3.groupby(level=["algorithm type", "algorithm", "modality", "stage", "metric"]).agg(["median"])
df_3_mean.unstack("modality")["data"]["median"][["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + EDR"]]

In [None]:
df_3_plot = multi_xs(df_3, ["mcc"], level="metric")

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))
sns.boxplot(
    data=multi_xs(
        df_3_plot, ["MLP", "AdaBoost", "SVM", "Random Forest", "XGBoost", "LSTM", "TCN"], level="algorithm"
    ).reset_index(),
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + EDR"],
    ax=ax,
)
ax.legend(bbox_to_anchor=(1, 1))

plt.title("3 stage - MCC - overall performance")

# Binary

In [None]:
df_2 = multi_xs(df, ["Binary"], level="stage")
df_2 = multi_xs(df_2, ["accuracy", "f1", "mcc"],level="metric")[["data"]].astype(float)

In [None]:
df_2_mean = df_2.groupby(level=["algorithm type", "algorithm", "modality", "stage", "metric"]).agg(["median"])
df_2_mean.unstack("modality")["data"]["median"][["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + EDR"]]

In [None]:
df_2_plot = multi_xs(df_2, ["mcc"], level="metric")

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))
sns.boxplot(
    data=multi_xs(
        df_2_plot, ["MLP", "AdaBoost", "SVM", "Random Forest", "LSTM", "TCN"], level="algorithm"
    ).reset_index(),
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + EDR"],
    ax=ax,
)
ax.legend(bbox_to_anchor=(1, 1))

plt.title("Binary_ stage - MCC - overall performance")

# Poster plots

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
sns.boxplot(
    data=multi_xs(
        df_5_plot, ["XGBoost", "LSTM"], level="algorithm"
    ).reset_index(),
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + EDR"],
    ax=ax,
)
ax.legend(bbox_to_anchor=(1, 1))

plt.title("5 stage - MCC - overall performance")

# Plot bestperforming ML + DL

# 5stage

In [None]:
algorithm = ["LSTM","XGBoost"]
stage = "5stage"

In [None]:
df_plot = multi_xs(df, [stage], level="stage")
df_plot = df_plot.rename(index={'ACT + HRV + EDR': 'ACT + HRV + ED-RRV'})

In [None]:
import biopsykit as bp
from biopsykit.stats import StatsPipeline
df_plot = multi_xs(df_plot, ["accuracy","mcc", "f1"], level="metric")

In [None]:
df_plot = multi_xs(
        df_plot, algorithm, level="algorithm"
    )

In [None]:
dict_stats = {}

for metric in ["accuracy", "mcc", "f1"]:
    steps = [
        # ("prep", "normality"),
        # ("test", "pairwise_ttests")
        ("test", "kruskal"),
        ("posthoc", "pairwise_tests"),
    ]

    params = {
        "dv": "data",
        "between": "modality",
        # "subject": "subject",
        "groupby": ["algorithm"],
        "parametric": False,
        # "multicomp": {"levels": ["algorithm"]},
    }

    df_slice = df_plot.xs(metric, level="metric")
    pipeline = StatsPipeline(steps, params, round=4)
    pipeline.apply(df_slice)

    dict_stats[metric] = pipeline

dict_stats

In [None]:
dict_brackets = {
    key: dict_stats[key].sig_brackets(
        "posthoc", stats_type="between", plot_type="multi", x="algorithm", stats_effect_type="between"
    )
    for key in ["accuracy", "mcc", "f1"]
}

In [None]:
box_pairs = {key: val[0] for key, val in dict_brackets.items()}
pvalues = {key: val[1] for key, val in dict_brackets.items()}

In [None]:
fig, axs = plt.subplots(figsize=(17, 7), ncols=3)

features = ["accuracy", "mcc", "f1"]


fig, axs = bp.plotting.multi_feature_boxplot(
    data=df_plot,
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + ED-RRV"],
    group="metric",
    features=features,
    stats_kwargs={
        "box_pairs": box_pairs,
        "pvalues": pvalues,
        "fontsize": "medium",
        "text_offset": -6,
        "verbose": False,
    },
    ylabels={"accuracy": "Accuracy [%]", "mcc": "Matthews's correlation coefficient", "f1": "F1-score [%]"},
    axs=axs,
)
fig.tight_layout(rect=(0, 0, 1, 0.95), pad=1.5)
handles, labels = axs[0].get_legend_handles_labels()

for ax, metric in zip(axs, features):
    ax.legend().remove()
    ax.set_xlabel("Algorithm")
    ax.tick_params(reset=True, right=False, top=False)


# fig.legends[0].set_title("Input Modality")
fig.legends[0].remove()
plt.legend(ncol=4, bbox_to_anchor=(0.7, 1.15))

axs[0].set_yticks([i for i in np.arange(20, 110, 10)])
axs[1].set_yticks([i for i in np.arange(0, 1.1, 0.2)])
axs[2].set_yticks([i for i in np.arange(0, 110, 10)])

axs[0].set_ylim(15, 115)

plt.savefig(
    Path.cwd().joinpath("best_performing_"+stage+"_.pdf"),
    format="pdf",
    bbox_inches="tight",
)

# 3 stage

In [None]:
algorithm = ["LSTM","XGBoost"]
stage = "3stage"

In [None]:
df_plot = multi_xs(df, [stage], level="stage")
df_plot = df_plot.rename(index={'ACT + HRV + EDR': 'ACT + HRV + ED-RRV'})

In [None]:
import biopsykit as bp
from biopsykit.stats import StatsPipeline
df_plot = multi_xs(df_plot, ["accuracy","mcc", "f1"], level="metric")

In [None]:
df_plot = multi_xs(
        df_plot, algorithm, level="algorithm"
    )

In [None]:
dict_stats = {}

for metric in ["accuracy", "mcc", "f1"]:
    steps = [
        # ("prep", "normality"),
        # ("test", "pairwise_ttests")
        ("test", "kruskal"),
        ("posthoc", "pairwise_tests"),
    ]

    params = {
        "dv": "data",
        "between": "modality",
        # "subject": "subject",
        "groupby": ["algorithm"],
        "parametric": False,
        # "multicomp": {"levels": ["algorithm"]},
    }

    df_slice = df_plot.xs(metric, level="metric")
    pipeline = StatsPipeline(steps, params, round=4)
    pipeline.apply(df_slice)

    dict_stats[metric] = pipeline

dict_stats

In [None]:
dict_brackets = {
    key: dict_stats[key].sig_brackets(
        "posthoc", stats_type="between", plot_type="multi", x="algorithm", stats_effect_type="between"
    )
    for key in ["accuracy", "mcc", "f1"]
}

In [None]:
box_pairs = {key: val[0] for key, val in dict_brackets.items()}
pvalues = {key: val[1] for key, val in dict_brackets.items()}

In [None]:
fig, axs = plt.subplots(figsize=(17, 7), ncols=3)

features = ["accuracy", "mcc", "f1"]


fig, axs = bp.plotting.multi_feature_boxplot(
    data=df_plot,
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + ED-RRV"],
    group="metric",
    features=features,
    stats_kwargs={
        "box_pairs": box_pairs,
        "pvalues": pvalues,
        "fontsize": "medium",
        "text_offset": -6,
        "verbose": False,
    },
    ylabels={"accuracy": "Accuracy [%]", "mcc": "Matthews's correlation coefficient", "f1": "F1-score [%]"},
    axs=axs,
)
fig.tight_layout(rect=(0, 0, 1, 0.95), pad=1.5)
handles, labels = axs[0].get_legend_handles_labels()

for ax, metric in zip(axs, features):
    ax.legend().remove()
    ax.set_xlabel("Algorithm")
    ax.tick_params(reset=True, right=False, top=False)


# fig.legends[0].set_title("Input Modality")
fig.legends[0].remove()
plt.legend(ncol=4, bbox_to_anchor=(0.7, 1.15))

axs[0].set_yticks([i for i in np.arange(20, 110, 10)])
axs[1].set_yticks([i for i in np.arange(0, 1.1, 0.2)])
axs[2].set_yticks([i for i in np.arange(0, 110, 10)])

axs[0].set_ylim(15, 135)

plt.savefig(
    Path.cwd().joinpath("best_performing_"+stage+"_.pdf"),
    format="pdf",
    bbox_inches="tight",
)

# Binary

In [None]:
algorithm = ["LSTM","XGBoost"]
stage = "Binary"

In [None]:
df_plot = multi_xs(df, [stage], level="stage")
df_plot = df_plot.rename(index={'ACT + HRV + EDR': 'ACT + HRV + ED-RRV'})

In [None]:
import biopsykit as bp
from biopsykit.stats import StatsPipeline
df_plot = multi_xs(df_plot, ["accuracy","mcc", "f1"], level="metric")

In [None]:
df_plot = multi_xs(
        df_plot, algorithm, level="algorithm"
    )

In [None]:
dict_stats = {}

for metric in ["accuracy", "mcc", "f1"]:
    steps = [
        # ("prep", "normality"),
        # ("test", "pairwise_ttests")
        ("test", "kruskal"),
        ("posthoc", "pairwise_tests"),
    ]

    params = {
        "dv": "data",
        "between": "modality",
        # "subject": "subject",
        "groupby": ["algorithm"],
        "parametric": False,
        # "multicomp": {"levels": ["algorithm"]},
    }

    df_slice = df_plot.xs(metric, level="metric")
    pipeline = StatsPipeline(steps, params, round=4)
    pipeline.apply(df_slice)

    dict_stats[metric] = pipeline

dict_stats

In [None]:
dict_brackets = {
    key: dict_stats[key].sig_brackets(
        "posthoc", stats_type="between", plot_type="multi", x="algorithm", stats_effect_type="between"
    )
    for key in ["accuracy", "mcc", "f1"]
}

In [None]:
box_pairs = {key: val[0] for key, val in dict_brackets.items()}
pvalues = {key: val[1] for key, val in dict_brackets.items()}
display(box_pairs)
display(pvalues)

In [None]:
fig, axs = plt.subplots(figsize=(17, 7), ncols=3)

features = ["accuracy", "mcc", "f1"]


fig, axs = bp.plotting.multi_feature_boxplot(
    data=df_plot,
    x="algorithm",
    y="data",
    hue="modality",
    hue_order=["ACT", "ACT + HRV", "ACT + HRV + RRV", "ACT + HRV + ED-RRV"],
    group="metric",
    features=features,
    stats_kwargs={
        "box_pairs": box_pairs,
        "pvalues": pvalues,
        "fontsize": "medium",
        "text_offset": -6,
        "verbose": False,
    },
    ylabels={"accuracy": "Accuracy [%]", "mcc": "Matthews's correlation coefficient", "f1": "F1-score [%]"},
    axs=axs,
)
fig.tight_layout(rect=(0, 0, 1, 0.95), pad=1.5)
handles, labels = axs[0].get_legend_handles_labels()

for ax, metric in zip(axs, features):
    ax.legend().remove()
    ax.set_xlabel("Algorithm")
    ax.tick_params(reset=True, right=False, top=False)


# fig.legends[0].set_title("Input Modality")
fig.legends[0].remove()
plt.legend(ncol=4, bbox_to_anchor=(0.7, 1.15))

axs[0].set_yticks([i for i in np.arange(20, 110, 10)])
axs[1].set_yticks([i for i in np.arange(0, 1.1, 0.2)])
axs[2].set_yticks([i for i in np.arange(0, 110, 10)])

axs[0].set_ylim(15, 135)

plt.savefig(
    Path.cwd().joinpath("best_performing_"+stage+"_.pdf"),
    format="pdf",
    bbox_inches="tight",
)