## Distance traveled report plot

In [4]:
import os
import toml
import scipy.stats

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src import settings
from src.utils import fileio

config = fileio.get_config(settings.CONFIG_NAME)

INPUT_PATH = os.path.join(settings.RESULTS_DIR, "distances_traveled")

all_treatments = fileio.load_multiple_folders(INPUT_PATH)
all_treatments = {key: value for key, value in all_treatments.items() if key in config["TREATMENTS"]}

treatment_dataframes = []
for treatment_name, treatment_path in all_treatments.items():
    all_groups = fileio.load_files_from_folder(treatment_path)
    group_dataframes = []
    for group_name, df_path in all_groups.items():
        df = pd.read_csv(df_path, index_col=0)
        df["Snapshot"] = (df.index / config["FPS"] / config["TIME_WINDOW"]).astype(int) + 1
        df["Group"] = group_name.replace(".csv", "")
        group_dataframes.append(df)

    df = pd.concat(group_dataframes)
    df.set_index("Snapshot", inplace=True)
    df.set_index("Group", append=True, inplace=True)
    df["Treatment"] = treatment_name
    df.set_index("Treatment", append=True, inplace=True)
    df = df.reorder_levels(["Treatment", "Group", "Snapshot"])

    treatment_dataframes.append(df)

combined_data = pd.concat(treatment_dataframes)
combined_data_reset = combined_data.groupby(["Treatment", "Group"]).sum()
combined_data_reset = combined_data_reset.stack().reset_index().rename(columns={"level_2": "Fly", 0: "Distance"})
combined_data_reset["Distance"] = combined_data_reset["Distance"] / config["EXPERIMENT_DURATION"]

treatment_sums = {}
for treatment in config["TREATMENTS"]:
    treatment_sums[f"sum_{treatment}"] = combined_data_reset[combined_data_reset["Treatment"] == treatment]["Distance"]

for treatment in treatment_sums.keys():
    print(
        f"{treatment} mean value: {np.mean(treatment_sums[treatment])} STD value: {np.std(treatment_sums[treatment])}"
    )

# if len(all_treatments) > 1:
#     anova_result = scipy.stats.f_oneway(*treatment_sums.values())
#     print(f"\n {anova_result}")

all_data = np.concatenate([*treatment_sums.values()])
group_labels = []
for treatment in config["TREATMENTS"]:
    group_labels.extend(
        [treatment] * len(combined_data_reset[combined_data_reset["Treatment"] == treatment]["Distance"])
    )


fig, axes = plt.subplots(2, 2, figsize=(14, 10))
sns.pointplot(
    data=combined_data_reset,
    x="Treatment",
    y="Distance",
    hue="Treatment",
    errorbar="sd",
    dodge=False,
    order=config["TREATMENTS"],
    ax=axes[0, 0],
)
axes[0, 0].set_title("Movement speed per treatment with SD")
# axes[0, 0].legend(loc="center left", bbox_to_anchor=(1, 0.5), title="Treatment", labels=config["TREATMENTS"])
axes[0, 0].set_xlabel("Treatment name")
axes[0, 0].set_ylabel("Movement (mm/s)")
axes[0, 0].tick_params(axis="x", rotation=90)
axes[0, 0].set_ylim(0, combined_data_reset["Distance"].max() * 1.1)

sns.pointplot(
    data=combined_data_reset,
    x="Treatment",
    y="Distance",
    hue="Treatment",
    errorbar="se",
    dodge=False,
    order=config["TREATMENTS"],
    ax=axes[0, 1],
)
axes[0, 1].set_title("Movement speed per treatment with SE")
# axes[0, 1].legend(loc="center left", bbox_to_anchor=(1, 0.5), title="Treatment", labels=config["TREATMENTS"])
axes[0, 1].set_xlabel("Treatment name")
axes[0, 1].set_ylabel("Movement (mm/s)")
axes[0, 1].tick_params(axis="x", rotation=90)
axes[0, 1].set_ylim(0, combined_data_reset["Distance"].max() * 1.1)

sns.boxplot(
    data=combined_data_reset,
    x="Treatment",
    y="Distance",
    hue="Treatment",
    dodge=False,
    order=config["TREATMENTS"],
    ax=axes[1, 0],
)
axes[1, 0].set_title("Boxplot: Movement speed per treatment")
# axes[1, 0].legend(loc="center left", bbox_to_anchor=(1, 0.5), title="Treatment", labels=config["TREATMENTS"])
axes[1, 0].set_xlabel("Treatment name")
axes[1, 0].set_ylabel("Movement (mm/s)")
axes[1, 0].tick_params(axis="x", rotation=90)
axes[1, 0].set_ylim(0, combined_data_reset["Distance"].max() * 1.1)

sns.scatterplot(
    data=combined_data_reset,
    x="Group",
    y="Distance",
    hue="Treatment",
    ax=axes[1, 1],
    s=50,
    alpha=0.6,
    markers=True,
    style="Treatment",
)

total_groups = 0
for treatment_name, treatment_path in all_treatments.items():
    total_groups += len(fileio.load_files_from_folder(treatment_path))

per_group = total_groups / len(all_treatments)

locations_x = [per_group / 2 + (per_group * x) for x in range(0, len(all_treatments))]

axes[1, 1].set_title("Scatter plot: Movement speed per treatment")
# axes[1, 1].legend(loc="center left", bbox_to_anchor=(1, 0.5), title="Treatment", labels=config["TREATMENTS"])
axes[1, 1].set_xlabel("Treatment name")
axes[1, 1].set_ylabel("Movement (mm/s)")
axes[1, 1].set_xticks(locations_x)
axes[1, 1].set_xticklabels(config["TREATMENTS"])
axes[1, 1].tick_params(axis="x", rotation=45)
axes[1, 1].set_ylim(0, combined_data_reset["Distance"].max() * 1.1)

plt.tight_layout()
plt.show()