In [None]:
from analysing_imaging_data import analysing_imaging_data as aid
from custom_plotting import custom_plotting as cp
from custom_stats import custom_stats

import pandas as pd
from scipy import stats
import scikit_posthocs as sp
from statsmodels import formula
from statsmodels import api
import os


%load_ext autoreload
%autoreload 2

In [None]:
tidy_df_num = aid.read_csv_folder_into_tidy_df("./Output_C0/[a-z]*.csv")
tidy_df_denom = aid.read_csv_folder_into_tidy_df("./Output_C1/[a-z]*.csv")

tidy_df_num_div_denom = (
    tidy_df_num.set_index(["sample_gut_id", "sample_id", "gut_id"])
    .div(tidy_df_denom.set_index(["sample_gut_id", "sample_id", "gut_id"]))
    .reset_index()
)

In [None]:
my_pal = cp.define_pallette_tidy(tidy_df_num_div_denom["sample_id"])

In [None]:
cp.tidy_create_strip_box_plot(
    ExpName=aid.exp_analysis_name(), 
    x_figSize=cp.determine_fig_width_from_palette(my_pal),
    y_axis_limit=None,
    save_fig=False,
    data=tidy_df_num_div_denom,
    y_label=cp.identify_y_axis_label(aid.exp_analysis_name()),
    x="sample_id",
    y="Mean",
    palette=my_pal,
)

In [None]:
custom_stats.kruskal_scipy_stats_tidy_df_wrapper(
    tidy_df=tidy_df_num_div_denom, indep_var="sample_id", dep_var="Mean"
)

In [None]:
sp.posthoc_dunn(
    tidy_df_num_div_denom, val_col="Mean", group_col="sample_id", p_adjust="bonferroni"
)[0:1]

In [None]:
tidy_df_num_div_denom_group = aid.grouped_tidy_data_summary_stats(
    tidy_df=tidy_df_num_div_denom,
    group_col="sample_gut_id",
    agg_funcs=["mean", "median"],
    categories=tidy_df_num_div_denom["sample_id"].cat.categories,
)

In [None]:
stat_type = "mean"

cp.tidy_create_swarm_box_plot(
    ExpName=aid.exp_analysis_name() + f"_{stat_type}",
    save_fig=False,
    data=tidy_df_num_div_denom_group.query(f"summary_stat == '{stat_type}'"),

    y_label=cp.identify_y_axis_label(aid.exp_analysis_name()),
    x_figSize=cp.determine_fig_width_from_palette(my_pal),
    x="sample_id",
    y="Mean",
    palette=my_pal,
)

sp.posthoc_ttest(
    tidy_df_num_div_denom_group.query(f"summary_stat == '{stat_type}'"),
    val_col="Mean",
    group_col="sample_id",
    pool_sd=True,
    p_adjust="holm-sidak",
)

In [None]:
stat_type = "median"

cp.tidy_create_swarm_box_plot(
    ExpName=aid.exp_analysis_name() + f"_{stat_type}",
    save_fig=False,
    data=tidy_df_num_div_denom_group.query(f"summary_stat == '{stat_type}'"),
    y_label=cp.identify_y_axis_label(aid.exp_analysis_name()),
    x_figSize=cp.determine_fig_width_from_palette(my_pal),
    x="sample_id",
    y="Mean",
    palette=my_pal,
)

sp.posthoc_ttest(
    tidy_df_num_div_denom_group.query(f"summary_stat == '{stat_type}'"),
    val_col="Mean",
    group_col="sample_id",
    pool_sd=True,
    p_adjust="holm-sidak",
)

In [None]:
tidy_df_num_div_denom.to_csv(f"{aid.exp_analysis_name()}_tidy.csv")
tidy_df_num_div_denom_group.to_csv(f"{aid.exp_analysis_name()}_per_gut_tidy.csv")

In [None]:
if os.path.isfile("Tidy_Image_Analysis_Template.ipynb"):
    os.rename(
        "Tidy_Image_Analysis_Template.ipynb",
        f"{aid.exp_analysis_name()}_tidy.ipynb",
    )