# Notebook for creating table of results for google spreadsheet

In [1]:
import numpy as np
import pandas as pd
import analyze_cells_energy as ace
import utils
import utils_PLS as upls

In [4]:
def find_significant_areas(dictionary, experimental_groups, value, test='mannwhitneyu', alpha=0.05):
    """
    Find significant areas based on statistical tests across experimental groups.

    This function calculates the specified value across different experimental groups,
    performs statistical tests to compare the groups, and identifies significant areas
    where the p-value is less than the given alpha threshold.

    Parameters:
    dictionary (dict): Dictionary containing results across mice.
    experimental_groups (dict): Dictionary of experimental groups.
    value (str): The value to calculate across groups (e.g., 'n_cells').
    test (str): Statistical test to use (default is 'mannwhitneyu').
    alpha (float): Significance level for the statistical test (default is 0.05).

    Returns:
    pd.DataFrame: DataFrame containing significant areas with p-values less than alpha.
    """
    # Calculate values across groups
    dfs = ace.calculate_value_across_groups(
        experimental_groups=experimental_groups,
        dict_results_across_mice=dictionary,
        value=value
    )
    
    # Perform statistical tests across groups
    df_ttest = ace.test_across_groups(
        dfs,
        test=test,
        groups=list(experimental_groups.keys())
    )
    
    # Extract column names excluding 'area'
    columns = df_ttest.loc[:, df_ttest.columns != 'area'].columns
    
    # Initialize DataFrame for significant areas
    df_sigareas = pd.DataFrame()
    
    # Iterate through columns and find significant areas
    for col in columns:
        # Sort DataFrame by the current column and filter areas with p-value < alpha
        df = df_ttest.sort_values(by=col)[['area', col]]
        df_sigareas = pd.concat([df_sigareas, df[df[col] < alpha]['area'].reset_index(drop=True)], axis=1)
    
    # Rename columns to remove 'pval_' prefix
    df_sigareas.columns = [col.replace('pval_', '') for col in columns]
    
    return df_sigareas


# Loading data

In [2]:
df_levels = upls.create_df_levels(ace.clean_volumes_database(), level=8)

In [3]:
c57_wf = np.load('dict_results/newvolumes/dict_results_across_mice_c57_merged_hemispheres_F6BL48.npy', 
                                   allow_pickle=True).item()
c57_wof = np.load('dict_results/newvolumes/dict_results_across_mice_BL_woF2000_36.npy', 
                                   allow_pickle=True).item()
wild_wf = np.load('dict_results/newvolumes/dict_results_across_mice_WILD_with_father2500_newwild.npy', 
                                   allow_pickle=True).item()
wild_wof = np.load('dict_results/newvolumes/dict_results_across_mice_WILD_without_father2500.npy', 
                                   allow_pickle=True).item()

# Make table for areas significant in one experimental group, from non parametric test and PLS test

Nota bene: does not distinguish between Conditions: Control, Fam, Unfam

In [6]:
df = pd.DataFrame(columns=['area', 
                           'c57_with_father', 
                           'c57_without_father', 
                           'wild_with_father', 
                           'wild_without_father'])
df['area'] = df_levels[~df_levels['parent_l5'].isin(['P','MY','CBX', 'CBN'])]['name_area'].to_numpy()
df = df.set_index('area')
df = df.fillna(0)

labels = {'c57_with_father':'c57',
          'c57_without_father':'BL_woF2000',
          'wild_with_father':'WILD_with_father2500',
          'wild_without_father':'WILD_without_father2500'}

for dictionary, label in zip([c57_wf, c57_wof, wild_wf, wild_wof],
                             ['c57_with_father', 
                              'c57_without_father', 
                              'wild_with_father', 
                              'wild_without_father']):
    list_significant_areas = ace.select_significant_areas(dictionary=dictionary, 
                       experimental_groups=utils.divide_in_exp_groups(list_subjects=dictionary.keys()),
                                            batch=labels[label],
                                            value_test='n_cells',
                                            value_pls='relative_density',
                                            test='mannwhitneyu',
                                            threshold_test=0.05,
                                           threshold_pls=2.56)
#     for area in df.index:
    for result in list_significant_areas:
        df.loc[result,label] = 1
        
# Optional: save table as csv
# df.to_csv('significant_areas_tests.csv')

In [7]:
df

Unnamed: 0_level_0,c57_with_father,c57_without_father,wild_with_father,wild_without_father
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Frontal pole cerebral cortex,0,0,0,1
Primary motor area,0,1,0,1
Secondary motor area,0,0,0,1
Primary somatosensory area,0,0,0,0
Supplemental somatosensory area,0,0,0,0
...,...,...,...,...
Interfascicular nucleus raphe,0,0,0,0
Interpeduncular nucleus,1,0,0,0
Rostral linear nucleus raphe,0,0,0,0
Central linear nucleus raphe,1,0,0,0


# Make binary table for areas significant in one experimental group, from non parametric test, distinguishing across groups

Nota bene: does not consider PLS test

In [8]:
cols = pd.MultiIndex.from_tuples([("c57", "with father", 'Control_vs_Fam'), 
                                  ("c57", "with father", 'Control_vs_Unfam'), 
                                  ("c57", "with father", 'Fam_vs_Unfam'), 
                                  ("c57", "without father", 'Control_vs_Fam'), 
                                  ("c57", "without father", 'Control_vs_Unfam'), 
                                  ("c57", "without father", 'Fam_vs_Unfam'), 
                                  ("wild", "with father", 'Control_vs_Fam'), 
                                  ("wild", "with father", 'Control_vs_Unfam'), 
                                  ("wild", "with father", 'Fam_vs_Unfam'), 
                                  ("wild", "without father", 'Control_vs_Fam'), 
                                  ("wild", "without father", 'Control_vs_Unfam'), 
                                  ("wild", "without father", 'Fam_vs_Unfam')])

In [9]:
df1 = pd.DataFrame(columns=cols, 
                   index=df_levels[~df_levels['parent_l5'].isin(['P','MY','CBX', 'CBN'])]['name_area'].to_numpy())
df1 = df1.fillna(0)

dictionary = {'c57':{'with father':c57_wf,
                     'without father':c57_wof},
             'wild':{'with father':wild_wf,
                     'without father':wild_wof}}

for mouse_group in dictionary.keys():
    for condition in dictionary[mouse_group].keys():
        df_significant_areas = find_significant_areas(dictionary=
                                                        dictionary[mouse_group][condition],
                                                        experimental_groups=\
                                 utils.divide_in_exp_groups(list_subjects=
                                                            dictionary[mouse_group][condition].keys()),
                                                        value='n_cells',
                                                        test='mannwhitneyu', 
                                                        alpha=0.05)
        for group in df_significant_areas.columns:
            for result in df_significant_areas[group].dropna():
                df1[(mouse_group,condition,group)].loc[result] = 1
df1.to_csv('significant_areas_tests.csv')

In [10]:
df1

Unnamed: 0_level_0,c57,c57,c57,c57,c57,c57,wild,wild,wild,wild,wild,wild
Unnamed: 0_level_1,with father,with father,with father,without father,without father,without father,with father,with father,with father,without father,without father,without father
Unnamed: 0_level_2,Control_vs_Fam,Control_vs_Unfam,Fam_vs_Unfam,Control_vs_Fam,Control_vs_Unfam,Fam_vs_Unfam,Control_vs_Fam,Control_vs_Unfam,Fam_vs_Unfam,Control_vs_Fam,Control_vs_Unfam,Fam_vs_Unfam
Frontal pole cerebral cortex,0,0,0,0,0,0,0,0,0,0,1,0
Primary motor area,0,0,0,0,0,1,0,0,0,1,1,0
Secondary motor area,0,0,0,0,0,0,0,0,0,0,1,0
Primary somatosensory area,0,0,0,0,0,0,0,0,0,0,0,0
Supplemental somatosensory area,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
Interfascicular nucleus raphe,0,0,0,0,0,0,0,0,0,0,0,0
Interpeduncular nucleus,0,0,0,0,0,0,0,0,0,0,0,0
Rostral linear nucleus raphe,0,0,0,0,0,0,0,0,0,0,0,0
Central linear nucleus raphe,1,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Optional: save table as csv
# df1.to_csv('significant_areas_nonparametrictest.csv')