Given a set of basic statistical results, which give coefficients for individual transitions, we search for coefficients which have the larget coefficient for transitions from each behavior.  That is we don't look for the largest single coefficient, but we first group coefficients by the behavior they transition from and then search for the largest coefficient in each of these groups. 

This will save results in a format that is conducive for working with existing plotting code. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import pickle
import re

import numpy as np

from keller_zlatic_vnc.whole_brain.whole_brain_stat_functions import test_for_largest_amplitude_beta

## Parameters go here

In [3]:
ps = dict()
ps['save_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_spont_stats'
ps['basic_rs_file'] = 'spont_4_20_20_long_bl_co_21.pkl'

## Load the basic results

In [4]:
with open(Path(ps['save_folder']) / ps['basic_rs_file'], 'rb') as f:
    basic_rs = pickle.load(f)

In [5]:
beh_trans = basic_rs['beh_trans']

## Search for max values

In [6]:
def find_max_vls(stats, beh_trans):
    """ This is a helper function which searches for the largest coefficient in each group, testing statistical significance.
    
    For a given group, we find the largest estimated coefficient.  We define the p-value for any coefficient that is not
    the largest to be 1 (to denote we have no evidence it is largest).  For the largest coefficient, we set its p-value
    to be equal to the largest p-value of all pair-wise tests we apply to see if there are significant differences between
    coefficients (see test_for_largest_amplitude_beta for more information).  
    
    Further, if there is only one transition in a group (e.g., for a given start behavior, we only have transitions into
    a single end behavior, we also set the p-value of these coefficients to 1.)
    
    We return all p-values in a single vector, for ease of integration with plotting code, but it should be remembered
    that coefficinets were compared within groups. 
    
    """
    
    n_coefs = len(beh_trans)
    p_vls = np.zeros(n_coefs)
    
    unique_grp_behs = set([t[0] for t in beh_trans])
    
    # Process results for each group
    for grp_b in unique_grp_behs:
        keep_cols = np.asarray(np.argwhere([1 if b[0] == grp_b else 0 for b in beh_trans])).squeeze()

        p_vls[keep_cols] = 1 # Initially set all p-values to this group to 1, we will set the p-value 
                             # for the largest coefficient in the code below, but do denote that the 
                             # coefficients which are not largest are not to be considered, we set their
                             # p-values to 1. 

        if keep_cols.ndim > 0: # Means we have more than one coefficient
            grp_beta = stats['beta'][keep_cols]
            grp_acm = stats['acm'][np.ix_(keep_cols, keep_cols)]
            n_grps = stats['n_grps']
            # Note: alpha below is not important for this function, since we record p-values 
            largest_ind, detect, grp_p_vls = test_for_largest_amplitude_beta(beta=grp_beta, acm=grp_acm, n_grps=n_grps, 
                                                                         alpha=.05, test_for_largest=True)
            p_vls[keep_cols[largest_ind]] = np.nanmax(grp_p_vls)
        else:
            pass
            # We don't need to do anything - because we already set all p_vls for this group to 1
    
    new_stats = dict()
    new_stats['beta'] = stats['beta']
    new_stats['non_max_p'] = p_vls
    
    return new_stats
    
    

In [7]:
all_max_stats = [find_max_vls(s, basic_rs['beh_trans']) for s in basic_rs['full_stats']]

## Now save our results

In [8]:
rs = {'ps': ps, 'full_stats': all_max_stats, 'beh_trans': basic_rs['beh_trans']}

save_folder = ps['save_folder']
save_name = ps['basic_rs_file'].split('.')[0] + '_max_stats.pkl'

save_path = Path(save_folder) / save_name
with open(save_path, 'wb') as f:
    pickle.dump(rs, f)

## Debug code

In [10]:
rs['full_stats'][1]

{'beta': array([0.08350296, 0.12266931, 0.12176802, 0.1006147 , 0.07221072,
        0.09850796, 0.08834876, 0.09639469, 0.07644295, 0.11504698,
        0.09191699, 0.07144067, 0.1008051 , 0.1133689 , 0.09172334,
        0.06459704, 0.08376919, 0.08145411, 0.10048368, 0.07738923,
        0.07644632, 0.10791501, 0.07793585, 0.06050442, 0.05841707,
        0.03375159, 0.04098482, 0.08825726, 0.09024919, 0.06437009,
        0.11945482, 0.10000496, 0.10937092, 0.10980646, 0.09684939,
        0.09450957, 0.09807218, 0.07735601, 0.09319493, 0.09509638,
        0.10469987, 0.08157013]),
 'non_max_p': array([1.        , 0.92188128, 1.        , 1.        , 1.        ,
        0.84316507, 1.        , 1.        , 1.        , 0.91154208,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.59152258, 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.87426505, 1.   