# Developer Handbook - Statistical_analysis.py
- modul to perform statistical analysis on data
    - T-Test (independant)
    - Anova (oneway) + post hoc-Test
- used test is selected by user via GUI input
- additional safety: number of treatments in data is a factor for choosing test 
- list of functions:
    - perform_t_test()
    - perform_anova()
 
## Packages

In [None]:
from scipy.stats import ttest_ind, f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

## Function - perform_t_test()
- 2 parameter: re-structured data ("melted data"), treatment to compare (set via GUI)
1. initialisation of the variable "Treatment" to find out how many unique treatments are available in the data set
    - function only calculates test, when number of unique treatment == 2, otherwise errormessage in console and no return values 
2. assinging group1 and groups 2 for tests
3. perform t-test (ttest_ind()) with groups
4. printing test values (t_statistic and p-value)
return: t-statistic, p-value

In [None]:
def perform_t_test(melted_data, treatment_to_compare):
    # Conduct a t-test between two treatments if applicable
    treatments = melted_data[treatment_to_compare].unique()
    if len(treatments) == 2:
        group1 = melted_data[melted_data[treatment_to_compare] == treatments[0]]['Value']
        group2 = melted_data[melted_data[treatment_to_compare] == treatments[1]]['Value']
        # Perform the t-test (independant)
        t_stat, p_value = ttest_ind(group1, group2)
        print(f"t-statistic: {t_stat}, p-value: {p_value}")
        return t_stat, p_value
        
    else:
        print("No statistic! Current statistical analysis requires exactly two or more treatments.")
        return None, None

## Function - perform_anova()
- 2 parameter: re-structured data ("melted data"), treatment to compare (set via GUI)
1. initialisation of the variable "treatment" (Numpy-array) to find out how many unique treatments are available in the data set
    - function only calculates test, when number of unique treatment > 2, otherwise errormessage in console and no return values
2. initialisation of the variable groups: grouping data according to variable "treatment"
3. use groups to perform oneway anova
4. depedending on p-value post hoc test will be calutlated when p < 0.05 using pairwise_tukeyhsd()
return: f-statistic, p-value, optional: tukey result

In [None]:
def perform_anova(melted_data, treatment_to_compare):
    # Perform ANOVA between multiple treatments
    treatments = melted_data[treatment_to_compare].unique()
    if len(treatments) > 2:
        groups = [melted_data[melted_data[treatment_to_compare] == t]['Value'] for t in treatments]
        f_stat, p_value = f_oneway(*groups)
        
        print(f"ANOVA F-statistic: {f_stat}, p-value: {p_value}")
        if p_value < 0.05: 
            tukey_result = pairwise_tukeyhsd(melted_data['Value'], melted_data[treatment_to_compare], alpha=0.05)
            #print(tukey_result)
            return f_stat, p_value, tukey_result.summary()
        else:
            print("No significant difference, therefore no post-hoc test required.")
            return f_stat, p_value, None
        
    else:
        print("ANOVA requires more than two treatments.")
        return None, None, None