## Global measures distributions in snapshots joint for treatments

**interaction criteria:**
- ANGLE = 160 degree
- DISTANCE = 2.5 body lengths
- TIME = 0.6 seconds

In [1]:
import os
import toml
import scipy

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import scipy.stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd

from src import settings
from src.utils import fileio

CONFIG_PATH = os.path.join(settings.CONFIG_DIR, 'main.toml')
with open(CONFIG_PATH, 'r') as file:
    config = toml.load(file)  

INPUT_PATH = os.path.join(settings.RESULTS_DIR, 'global_measures')
all_treatments = fileio.load_files_from_folder(INPUT_PATH)

dataframes = []
for treatment_name, treatment_path in all_treatments.items():
    treatment_name = treatment_name.replace('.csv', '')
    if treatment_name in config['TREATMENTS']:
        df = pd.read_csv(treatment_path, index_col=0)
        df['Treatment'] = treatment_name
        dataframes.append(df)

combined_data = pd.concat(dataframes)
combined_data_reset = combined_data.reset_index()
measure_names = combined_data.columns.tolist()

for i, measure_name in enumerate(measure_names):
    if measure_name == 'Treatment':
        continue
    
    sum_CsCh = combined_data_reset[combined_data_reset['Treatment']=='CsCh'][measure_name]
    sum_Cs_5DIZ = combined_data_reset[combined_data_reset['Treatment']=='Cs_5DIZ'][measure_name]
    sum_LDA_5DIZ = combined_data_reset[combined_data_reset['Treatment']=='LDA_5DIZ'][measure_name]
    sum_OCT_5DIZ = combined_data_reset[combined_data_reset['Treatment']=='OCT_5DIZ'][measure_name]
    sum_LDA_OCT_5DIZ = combined_data_reset[combined_data_reset['Treatment']=='LDA_OCT_5DIZ'][measure_name]

    anova_result = scipy.stats.f_oneway(sum_CsCh, sum_Cs_5DIZ, sum_LDA_5DIZ, sum_OCT_5DIZ, sum_LDA_OCT_5DIZ)

    all_data = np.concatenate([sum_CsCh, sum_Cs_5DIZ, sum_LDA_5DIZ, sum_OCT_5DIZ, sum_LDA_OCT_5DIZ])
    group_labels = ['CsCh'] * len(sum_CsCh) + ['Cs_5DIZ'] * len(sum_Cs_5DIZ) + ['LDA_5DIZ'] * len(sum_LDA_5DIZ) + ['OCT_5DIZ'] * len(sum_OCT_5DIZ) + ['LDA_OCT_5DIZ'] * len(sum_LDA_OCT_5DIZ)
    tukey_results = pairwise_tukeyhsd(all_data, group_labels)

    if not any(tukey_results.reject):
        print('='*90)
        print(measure_name)
        print(tukey_results)

        plt.figure(figsize=(6, 4))
        order = ['CsCh', 'Cs_5DIZ', 'LDA_5DIZ', 'OCT_5DIZ', 'LDA_OCT_5DIZ'] 
        sns.pointplot(data=combined_data_reset, x='Treatment', y=measure_name, hue='index', errorbar='sd', order=order)
        # sns.boxplot(data=combined_data_reset, x='Treatment', y=measure_name, hue='index', order=order)
        
        plt.xlabel('Treatment')
        plt.ylabel(measure_name)
        plt.title(f'Distribution of {measure_name}', fontsize=18)
        # plt.legend('')
        plt.show() 
        print('='*90)

plt.tight_layout()
plt.show()

  st_range = np.abs(meandiffs) / std_pairs #studentized range statistic


<Figure size 640x480 with 0 Axes>