In [None]:
import analysing_imaging_data as aid
import custom_plotting_v1 as cp
import scipy.stats as ss
import scikit_posthocs as sp
import statsmodels.formula.api as smf
import statsmodels.api as sa

In [None]:
tidy_df_C1 = aid.read_csv_folder_into_tidy_df("./Output_C1/[a-z]*.csv")
tidy_df_C0 = aid.read_csv_folder_into_tidy_df("./Output_C0/[a-z]*.csv")

tidy_df_C0divC1 = (
    tidy_df_C0.set_index(["Sample_Gut_id", "Sample_id", "Gut_id"])
    .div(tidy_df_C1.set_index(["Sample_Gut_id", "Sample_id", "Gut_id"]))
    .reset_index()
)

In [None]:
my_pal = cp.define_pallette_tidy(series_id=tidy_df_C0divC1.Sample_id)

In [None]:
cp.tidy_create_strip_box_plot(
    x_figSize=cp.determine_fig_width_from_palette(my_pal),
    y_axis_limit=None,
    save_fig=False,
    data=tidy_df_C0divC1,
    y_label=cp.identify_y_axis_label(aid.exp_analysis_name()),
    x="Sample_id",
    y="Mean",
    palette=my_pal,
)

In [None]:
data = [
    tidy_df_C0divC1.loc[ids, "Mean"].values
    for ids in tidy_df_C0divC1.groupby("Sample_id").groups.values()
]
H, p = ss.kruskal(*data)
f"P.Value is {p}"

In [None]:
sp.posthoc_dunn(
    tidy_df_C0divC1, val_col="Mean", group_col="Sample_id", p_adjust="bonferroni"
).round(5)

In [None]:
tidy_df_C0divC1_grouped_mean = tidy_df_C0divC1.groupby(
    ["Sample_Gut_id", "Sample_id", "Gut_id"], as_index=False
).mean()

tidy_df_C0divC1_grouped_median = tidy_df_C0divC1.groupby(
    ["Sample_Gut_id", "Sample_id", "Gut_id"], as_index=False
).median()

In [None]:
cp.tidy_create_swarm_box_plot(
    ExpName=aid.exp_analysis_name() + "mean",
    save_fig=False,
    data=tidy_df_C0divC1_grouped_mean,
    y_axis_limit=None,
    y_label=cp.identify_y_axis_label(aid.exp_analysis_name()),
    x_figSize=cp.determine_fig_width_from_palette(my_pal),
    x="Sample_id",
    y="Mean",
    palette=my_pal,
)

In [None]:
lm_mean_data = smf.ols(formula="Mean ~ C(Sample_id)", data=tidy_df_C0divC1_grouped_mean).fit()
anova_mean_data = sa.stats.anova_lm(lm_mean_data)
anova_mean_data

In [None]:
sp.posthoc_ttest(
    tidy_df_C0divC1_grouped_mean,
    val_col="Mean",
    group_col="Sample_id",
    pool_sd=True,
    p_adjust="holm-sidak",
)

In [None]:
cp.tidy_create_swarm_box_plot(
    ExpName=aid.exp_analysis_name() + "mean",
    save_fig=False,
    data=tidy_df_C0divC1_grouped_median,
    y_axis_limit=None,
    y_label=cp.identify_y_axis_label(aid.exp_analysis_name()),
    x_figSize=cp.determine_fig_width_from_palette(my_pal),
    x="Sample_id",
    y="Mean",
    palette=my_pal,
)

In [None]:
lm_median_data = smf.ols(formula="Mean ~ C(Sample_id)", data=tidy_df_C0divC1_grouped_median).fit()
anova_median_data = sa.stats.anova_lm(lm_median_data)
anova_median_data

In [None]:
sp.posthoc_ttest(
    tidy_df_C0divC1_grouped_median,
    val_col="Mean",
    group_col="Sample_id",
    pool_sd=True,
    p_adjust="holm-sidak",
)

In [None]:
import os
if os.path.isfile("Tidy_Image_Analysis_Template.ipynb"):
    os.rename("Tidy_Image_Analysis_Template.ipynb", f"{aid.exp_analysis_name()}_tidy.ipynb")