In [1]:
%load_ext autoreload
%autoreload 2
from evaluation_scripts.experiment_class import Experiment
from evaluation_scripts.statistics import  anova, summarize
import matplotlib.pyplot as plt
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from evaluation_scripts.base import dict_to_json
import pandas as pd
import os

In [24]:

class Significance:
    def __init__(self, Data, crit_column = "U", alpha = 0.05, dt = 4):
        self.crit_column = crit_column
        self.alpha = alpha
        Data["U"] = (Data.phenotype == crit_column)
        self.t_max = t_max = max(Data.transfer_n)
        Data_end = Data[Data.transfer_n > t_max-dt]
        self.cols = cols = ["strategy", "transfer_n", "rep", crit_column]
        self.Data_end = Data_end[cols].copy()
        self.summerize()
        self.anova()
        if self.significant:
            self.tukey()
            if self.significant:
                self.get_winners_and_losers()
        else:
            self.tukey_results = None
            self.loser = None
            self.winner = None
            
    def summerize(self):
        groups = ["strategy", "rep"]
        self.summary = summarize(self.Data_end, groups, self.crit_column)
        
    def anova(self):
        self.anova_results = anova(self.summary, "strategy", self.crit_column + "_mean")
        self.significant = self.anova_results.pvalue < self.alpha

    def tukey(self):
        df = self.summary
        endog=df[self.crit_column + "_mean"]
        groups=df["strategy"]
        tuk = pairwise_tukeyhsd(endog=endog, groups = groups,  alpha=self.alpha)
        df = pd.DataFrame(tuk.summary().data[1:], columns=tuk.summary().data[0])
        self.tukey_results = df
        self.significant = df.reject.any()
        
    def get_winners_and_losers(self):
        df = self.tukey_results
        diff = df[df.meandiff == min(df.meandiff)]
        nominal_winner = diff.group1.unique()[0]
        nominal_loser = diff.group2.unique()[0]
        self.winner = [nominal_winner] + list(df[(df.group1 == nominal_winner) & (df.reject == False)].group2)
        self.loser = [nominal_loser] + list(df[(df.group2 == nominal_winner) & (df.reject == False)].group1)

        
def prep_dict_for_table(input_dict):
    result_dict = {}
    for key, value in input_dict.items():
        if isinstance(value, list):
            value_str = ", ".join(value)
        elif isinstance(value, float) and key == 'pValue':
            value_str = f'{value:.2e}'  # Format p-value in scientific notation with 2 decimal places
        else:
            value_str = value
        result_dict[key] = value_str
    return result_dict

In [46]:
dates = ["20220412", "20220127", "20210417"]

results = []
table = []
for date in dates:
    exp = Experiment(date)
    Data = exp.data
    Data = Data[Data.strategy != "No treatment"].copy()
    significance = Significance(Data)
    if significance.significant:
        entry = {
            "date":date,
            "loser":significance.loser,
            "winner":significance.winner,
            "pValue":significance.anova_results.pvalue, 
            "comment": ""
        }
    else:
        entry = {
            "date":date,
            "loser":"All",
            "winner":"All",
            "pValue":significance.anova_results.pvalue, 
            "comment": "ANOVA significant $(p<0.05)$, but tukey rejected all pairs"
        }
    results.append(entry)
    table.append(prep_dict_for_table(entry))
    
table = pd.DataFrame().from_records(table)


In [47]:
table

Unnamed: 0,date,loser,winner,pValue,comment
0,20220412,Mono B,Combination,3.34e-12,
1,20220127,,,0.034,"ANOVA significant $(p<0.05)$, but tukey reject..."
2,20210417,Mono A,"Combination, Cycling, Mixing",5.3e-14,


In [48]:
file_path = os.path.join(exp.pathes["tables"], "anova_no_no_treat.tex")
table.to_latex(file_path, index = False, column_format='lcccp{5cm}')

In [18]:
file_path = os.path.join(exp.pathes["summary"], "exp_anova_no_notreat.json")
dict_to_json(file_path, results)