Alignment analysis in ROIs  
output: df_rois_similarity.csv  


In [173]:
from os import listdir, makedirs, walk, remove, getlogin, rename
from os.path import join, exists, isfile, getmtime, isdir
import numpy as np
import re
from brainiak import image, io, isc
from brainiak.fcma.util import compute_correlation
import brainiak.searchlight.searchlight as searchlight
from brainiak.funcalign.srm import SRM
import nibabel as nib
import time
import sys
from scipy import stats
import socket
import pandas as pd
from mpi4py import MPI
import pickle
try:  # in jupyter - load tools and run test
    from IPython.display import display
    import matplotlib.pyplot as plt
    get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina' # for 4k screen")
    from IPython.core.interactiveshell import InteractiveShell  # for var view
    InteractiveShell.ast_node_interactivity = "all"  # for var view
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import rcParams
    rcParams.update({'figure.autolayout': True})
    import pdb
    import seaborn as sns
    use_test_params=True # also clips number of voxels, skips searchlight
except ImportError:  # not jupyter
    print('Error')



In [2]:
#set paths
my_name = getlogin()

# set system
this_system = socket.gethostname()
print ('Server: '+this_system)

# DATA
bids_path='/mnt/sink/scratch/{}/to_bids'.format(my_name) # raw data
const_data_path = '/mnt/bucket/labs/hasson/'+my_name
const_study_path=join(const_data_path,'onlineL','pred20') # pre-processed data
input_fslfeat_students_path=join(const_study_path,'scan_data_nii','students_mni','6motion') # student pre-processed data
input_fslfeat_experts_path=join(const_study_path,'scan_data_nii','experts_mni','6motion') # expert pre-processed data 


print('DATA: ' + const_study_path)

# CODE
code_path='/mnt/bucket/people/{}/{}/notebooks/share'.format(my_name,my_name[:-2])
print('CODE: ' + code_path)
# SCORES
scores_path=join(bids_path,'sourcedata','exam_scores.tsv')
timing_path=join(bids_path,'sourcedata','exam_timing.tsv')
# MASKS
masks_path=join(code_path,'masks')
const_mni_brain_file_name = join(masks_path, 'MNI152_T1_3mm_brain.nii.gz')  # MNI brain

# PICKLES
pickles_path=join(const_data_path,'onlineL','shared','pickles')
print('PICKLES:' + pickles_path)

Server: scotty.pni.Princeton.EDU
DATA: /mnt/bucket/labs/hasson/meshulam/onlineL/pred20
CODE: /mnt/bucket/people/meshulam/meshul/notebooks/share
PICKLES:/mnt/bucket/labs/hasson/meshulam/onlineL/shared/pickles


In [3]:
# import multiple comparisons tools
sys.path.insert(0, join(code_path, 'py'))
from multi_comp import fdr_correction # FDR from the MNE-python package


In [4]:
# get rois
use_roi=True
load_roi_dict=True
if load_roi_dict:
    pickle_filename=join(pickles_path,'thr20_roi_dict.p') # saved during lecture/recaps processing
    masks=pickle.load(open(pickle_filename, 'rb')) 



helper func definitions (read/save data, run correlations, permutation test)

In [106]:
def build_dict_filenames(student_and_expert_files):
    """
    build dict for all filenames, nested
    :param student_and_expert_files: from listdir
    :return: dict
    """
    filenames_dict = dict()
    filenames_dict['students'] = {}
    filenames_dict['experts'] = {}
    for i_this_file, this_file in enumerate(student_and_expert_files):
        dk = re.search(task_name_template, this_file)
        this_subject = str(dk[1])
        this_session = str(dk[2])
        this_task = str(dk[3])
        if i_this_file >= len(listdir(input_fslfeat_students_path)):
            student_or_expert = 'experts'
        else:
            student_or_expert = 'students'
        try:
            temp = type(filenames_dict[student_or_expert][this_subject]) is dict
        except KeyError:
            filenames_dict[student_or_expert][this_subject] = {}
        try:
            temp = type(filenames_dict[student_or_expert][this_subject][this_session]) is dict
        except KeyError:
            filenames_dict[student_or_expert][this_subject][this_session] = {}
        # write filename to dict
        filenames_dict[student_or_expert][this_subject][this_session][this_task] = join(
            eval('input_fslfeat_{}_path'.format(student_or_expert)), this_file)
    # filenames dict structure:
    # filenames_dict['students']['s110']['wk2']['vid1']
    return filenames_dict

In [107]:
def filenames_to_SRM_input_list(input_filenames, brain_mask):    
    # run once for each of (train, test)
    # take filenames and build a data structure (list) for input to SRM, zscored per subject, no nans
    # input:
    # filenames (list, text)
    # brain_mask (assumes it was already read with io.load_boolean_mask and thresholded)
    # outputs:
    # output_list - SRM input
    # function run time

    # We will divide the work (and memory) of loading subject data accross ranks.
    subject_idx_list = np.array_split([iSubject for iSubject in range(len(input_filenames))], comm.size)[comm.rank]
    print("Rank {}: Loading subjects -> {}".format(comm.rank, subject_idx_list))
    # on each rank, we need a list of the same size with None in place of subject data that is loaded on other ranks
    output_list = [None for i in input_filenames]
    t1 = time.time()  # timeit

    for iSubject in subject_idx_list:
        # load
        this_image = nib.load(input_filenames[iSubject])
        # mask
        this_image_masked = image.mask_image(this_image, brain_mask)
        # nan to 0
        this_image_masked[np.isnan(this_image_masked)] = 0
        # zscore
        this_image_masked_zscored = stats.zscore(this_image_masked, axis=1, ddof=1)
        # into x*y*z*tr format
        uncon = np.zeros(
            (brain_mask.shape[0], brain_mask.shape[1], brain_mask.shape[2], this_image_masked_zscored.shape[1]))
        coords = np.where(brain_mask)
        uncon[coords[0], coords[1], coords[2], :] = np.nan_to_num(this_image_masked_zscored)
        # update output
        output_list[iSubject] = uncon
    t2 = time.time()
    return output_list, t2 - t1  # data and runtime

In [108]:
def func_isc_in_notebook(D, collapse_subj=True, external_signal=None, float_type=np.float16):
    """
    Internal ISC function for this notebook, no stats
    external_signal: signal to correlate with, instead of mean subjects (n-1) like in standard isc
    # but here added external mean for comparison (mean of experts to compare to the individual students)
    """
    n_vox = D.shape[0]
    n_subj = D.shape[2]
    ISC = np.zeros((n_vox, n_subj), dtype=float_type)
    for this_subject in range(n_subj):
        if external_signal is not None:
            group = external_signal
            assert np.logical_and(group.shape[0] == D.shape[0],
                                  group.shape[1] == D.shape[1]), 'dims mismatch for external signal input'
        else:
            group = np.mean(D[:, :, np.arange(n_subj) != this_subject], axis=2)
        subj = D[:, :, this_subject]
        for v in range(n_vox):
            ISC[v, this_subject] = stats.pearsonr(group[v, :], subj[v, :])[0]
    
    if collapse_subj:
        ISC = isc.compute_summary_statistic(ISC, axis=1)[np.newaxis, :]
    # Throw away first dimension if singleton
    if ISC.shape[0] == 1:
        ISC = ISC[0]
    return ISC

In [109]:
def func_isfc_in_notebook(D, collapse_subj=True, external_signal=None, float_type=np.float16):
    """
    helper func, internal ISFC for this notebook, no stats
    external_signal: signal to correlate with, instead of mean subjects (n-1) like in standard isfc
    # but here added external mean for comparison (mean of experts to compare to the individual students)
    """
    
    n_vox = D.shape[0]
    n_subj = D.shape[2]
    ISFC = np.zeros((n_vox, n_vox, n_subj),dtype=float_type)

    # Loop across choice of leave-one-out subject
    for this_subject in range(n_subj):
        if external_signal is not None:
            group = external_signal
            assert np.logical_and(group.shape[0] == D.shape[0],
                                  group.shape[1] == D.shape[1]), 'dims mismatch for external signal input'
        else:
            group = np.mean(D[:, :, np.arange(n_subj) != this_subject], axis=2)
        subj_data = D[:, :, this_subject]
        ISFC[:, :, this_subject] = compute_correlation(np.ascontiguousarray(subj_data),np.ascontiguousarray(group)) # order important because compute_correlation correlates rows of matrix 1 with rows of matrix 2

        # Symmetrize matrix - skip
        #if external_signal is None:
        #    ISFC[:, :, this_subject] = (ISFC[:, :, this_subject] +
        #                            ISFC[:, :, this_subject].T) / 2
    # collapse over subjects
    if collapse_subj:
        ISFC = isc.compute_summary_statistic(ISFC, axis=2)
    # Throw away first dimension if singleton
    if ISFC.shape[0] == 1:
        ISFC = ISFC[0]

    return ISFC

In [110]:
def func_corr_and_null_dist(x,y,num_perms=0):
    """
    returns rval, null distribution for correlation of x,y
    """
    null_dist=np.nan
    rval = stats.pearsonr(x,y)[0]
    pval = 1
    if num_perms>0:
        null_dist=np.array([stats.pearsonr(np.random.permutation(x), y)[0] for n in np.arange(num_perms)])
    return rval,null_dist
  

In [111]:
def get_list_of_files_from_dict(filenames_dict, students_or_experts, week, vid_name):
    subject_keys = sorted([k for k, v in filenames_dict[students_or_experts].items()])
    return_files = [None for i in range(len(subject_keys))]
    for i_this_student in range(len(subject_keys)):
        try:
            return_files[i_this_student] = (
            filenames_dict[students_or_experts][subject_keys[i_this_student]][week][vid_name])
        except KeyError:
            pass
    return [i for i in return_files if i]  # eliminate nans

In [112]:
def intersect_filenames(list1, list2):
    # omit filenames from list1,list2, for subjects that do not appear in both lists
    # params:
    # list1 - file names
    # list2 - file names or list of subject names
    # output:
    # files_out_1 - list1 items, minus subjects that are not in list 2
    # files_out_2 - list2 items, minus subjects that are not in list 1 [only returned in case list2 contained files, otherwise not retured]
    list2_is_file_names = False
    subjects_in_1 = [str(re.search(task_name_template, f)[1]) for f in list1]
    try:
        subjects_in_2 = [str(re.search(task_name_template, f)[1]) for f in list2]
        list2_is_file_names = True
    except TypeError:  # not a list of files matching template, use subject names in list2 directly
        subjects_in_2 = list2
    shared_subjects = list(set(subjects_in_1) & set(subjects_in_2))  # find subjects in both sets
    files_out_1 = [f for f in list1 if str(re.search(task_name_template, f)[1]) in shared_subjects]
    if not list2_is_file_names:
        return files_out_1
    else:
        files_out_2 = [f for f in list2 if str(re.search(task_name_template, f)[1]) in shared_subjects]
    return files_out_1, files_out_2

In [113]:
def read_placement_logs(timing_path=timing_path,scores_path=scores_path):
    # read exam scores
    # returns:
    # df_q_timestamps - time stamp per question (most of the code is to get that)
    # placement_by_q - grades
    # get exam data
    scores_df=pd.read_csv(scores_path,sep='\t',index_col=[0])
    scores_df.columns=range(16)
    scores_df.index.name='Subject'
    students_ind=[True if int(i[1:])<200 else False for i,s in scores_df.iterrows() ]
    experts_ind=np.logical_not(students_ind)
    placement_by_q={}
    placement_by_q['students']=scores_df[students_ind]
    placement_by_q['experts']=scores_df[experts_ind]
    placement_by_q['students']=placement_by_q['students'].drop(['s112'], axis=0) # no placement data
    placement_by_q['experts']=placement_by_q['experts'].drop(['s201'], axis=0) # no placement data
    
    timing_df=pd.read_csv(timing_path,sep='\t',index_col=[0])
    timing_df['subject']=timing_df.index
    timing_df['student_or_expert']=['student' if int(i[1:])<200 else 'expert' for i,s in timing_df.iterrows() ]
    timing_df= timing_df.rename(columns={'question': 'q_number','response_onset_TR':'q_start_TR','response_offset_TR':'q_end_TR'})
    timing_df['q_number']-=1
    timing_df.subject=timing_df.index
    
    return timing_df,placement_by_q
    
    

In [114]:
# searchlight info printout
def print_sl_info(epi_list,sl_rad,nfeature,loc_split):
    print ('Searchlight cube edge size is {} -> {} voxels'.format(1 + 2 * sl_rad, (1 + 2 * sl_rad) ** 3))
    if do_srm:
        print ('Number of features in SRM: {}'.format(nfeature))
        print ('Number of TRs in training data: {}'.format(epi_list[0].shape[3]))
        print ('Number of TRs in test data: {}'.format(epi_list[int(len(epi_list) / 2)].shape[3]))
        print ('Number of subjects: {}'.format(int(len(epi_list) / 2)))
        print ('Number of student datasets: {}, expert datasets: {}.'.format(loc_split, len(epi_list) / 2 - loc_split))
    else:
        print ('Number of subjects: {}'.format(int(len(epi_list))))
        print ('Number of student datasets: {}, expert datasets: {}.'.format(loc_split, len(epi_list) - loc_split))
        print ('Number of TRs in first list item: {}'.format(epi_list[0].shape[3]))
    return

In [115]:
def get_file_name(suffix=None):
    """
    :params: suffix - last part of file name
    :return: name of file to save, no extension
    """
    file_name=''
    if 'wk' in sys.argv[1]:
        file_name+='SRM-train_'+train_vid_week+'-'+train_vid_name+'-test-'
    else:
        file_name+='data-'
    file_name+=test_vid_week+'-'+test_vid_name+'_search-'
    file_name+=str((1 + 2 * sl_rad) ** 3)+'vox_'
    file_name+= similarity_type+'_'+vs_mean_of+'_corrwscore-'+correlation_with_score+'_'
    file_name+='perms={}'.format(num_perms)
    if suffix:
        file_name+='_'+suffix
    return file_name
   

In [116]:
def load_epi_data(subject_groups):
    """
    load dict_epi_data dict
    :return: dictionary with all epi data for the subject groups specified
    """
    dict_epi_data = {}
    dict_epi_data['students']={}
    dict_epi_data['experts']={}
    dict_epi_data['students']['test_data']=[]
    dict_epi_data['experts']['test_data']=[]
   
    for students_or_experts in sorted(subject_groups):
        if rank == 0:
            print(students_or_experts)

        # get filenames for test, (+train if required)
        test_on_files_prep = get_list_of_files_from_dict(filenames_dict, students_or_experts, test_vid_week,
                                                         test_vid_name)
        if train_vid_week:
            train_on_files_prep = get_list_of_files_from_dict(filenames_dict, students_or_experts, train_vid_week,
                             train_vid_name)
        # get same subjects for train,test
        # then get files, read into dict of lists
        if 'student' in students_or_experts:  # get only good students
            test_on_files_prep = intersect_filenames(test_on_files_prep, good_students)
        elif 'expert' in students_or_experts:
            test_on_files_prep = intersect_filenames(test_on_files_prep, good_experts)
        # load train + intersect train-test + intersect with good students
        if train_vid_week:
            if 'student' in students_or_experts:
                train_on_files_prep = intersect_filenames(train_on_files_prep, good_students)  # get only good students/experts
            elif 'expert' in students_or_experts:
                train_on_files_prep = intersect_filenames(train_on_files_prep, good_experts)  # get only good students/experts
            train_on_files, test_on_files = intersect_filenames(train_on_files_prep,
                                                                test_on_files_prep)  # intersect test-train
            # load train
            train_on_input, train_load_time = filenames_to_SRM_input_list(train_on_files, brain_mask_3mm)
            dict_epi_data[students_or_experts]['train_data'] = train_on_input.copy()  # list of subjects, vox x trs
        else:  # no train data
            test_on_files = test_on_files_prep  # skip intersect with train
        # load test
        test_on_input, test_load_time = filenames_to_SRM_input_list(test_on_files, brain_mask_3mm)
        dict_epi_data[students_or_experts]['test_data'] = test_on_input.copy()
    if rank == 0:
        print()
        if train_vid_week:
            print("Training set, {} subjects, load time: {:.2f}s".format(len(train_on_input), train_load_time))
        print("Test set, {} subjects, load time: {:.2f}s".format(len(test_on_input), test_load_time))
        # addition for ROI
        print('Test on files:')
        print(test_on_files)
    return dict_epi_data

In [117]:
def func_roi_data(data_in_sl,roi_mask,roi_name,roi_hemi):
    """
    Get ROI from whole-brain data in data_in_sl
    """
    roi_mask=roi_mask>0 # binarize
    print('ROI {}-{}: {} voxels'.format(roi_name,roi_hemi,np.sum(roi_mask)))
    for st_ex in data_in_sl.keys():
        for tr_ts in data_in_sl[st_ex].keys():
            try:
                print('{}-{}'.format(st_ex,tr_ts))
                d1,d2,d3=roi_mask.shape
                roi_mask_vec=np.reshape(roi_mask,(d1*d2*d3)) # vectorize mask
                e=[i[roi_mask_vec,:] for i in data_in_sl[st_ex][tr_ts]]
                data_in_sl[st_ex][tr_ts]=e.copy()
                print('out shape: {}'.format(data_in_sl[st_ex][tr_ts][0].shape))
            except IndexError:
                print('Could not slice TRs for test in {}-{}'.format(st_ex,tr_ts))
    return data_in_sl

Core analyses  
'diagonal_similarity' - same question analysis (mean across students)  
'isfc_similarity' - knowledge structure analysis  (mean across students)
  
Per-item analyses   - full output  + regression coefficients (for plots)   
'diagonal_similarity_peritem'   
'isfc_similarity_peritem' 

In [118]:
def func_diagonal_similarity(df_q_timestamps, placement_by_q, data_in_sl, sim_or_sim_minus_nots='sim'):
    """
    same-question analysis
    """
    number_of_questions = len(df_q_timestamps['q_number'].unique())  # 16 q in placement exam
    corr_result = {}
    for student_or_expert in sorted(subject_groups): # order is important- do expert before to allow comparison with student
        epi_data = data_in_sl[student_or_expert + 's'][
            'test_data']  # in no srm version, epi data is just the non-transformed test data
        number_of_subjects = len(epi_data)
        corr_result[student_or_expert] = {}
        corr_result[student_or_expert]['roi_epi_per_q'] = None  # timecourse data in roi, do isc on this
        corr_result[student_or_expert][
            'roi_isc_per_q_vs_group_rval'] = None  # r-value result of isc, questions are in the 'voxels' dim: student vs student, expert vs expert
        corr_result[student_or_expert][
            'roi_isc_per_q_vs_group_dist'] = None  # null dist, questions are in the 'voxels' dim: student vs student, expert vs expert
        if 'student' in student_or_expert:
            corr_result['student']['roi_isc_per_q_student_vs_expert_rval'] = None  # rval result of pseudo-isc, student vs expert
            corr_result['student']['roi_isc_per_q_student_vs_expert_dist'] = None  # null dist of pseudo-isc, student vs expert
            
        epi_temporal_mean_per_q = np.zeros(
            [number_of_questions, epi_data[0].shape[0], len(epi_data)])  # questions (instead of TR) X voxels X subjects
        for this_question in range(number_of_questions):
            # print(this_question)
            # slice this question: entries for this question, all subjects (expert or student separately)
            df_this_q = df_q_timestamps.loc[np.logical_and(df_q_timestamps['student_or_expert'] == student_or_expert,
                                                           df_q_timestamps['q_number'] == this_question)]
            df_this_q = df_this_q.sort_values(by=['subject'], ascending=True)
            # start and end points for this question (TR), for each subject
            trs_start = df_this_q['q_start_TR'].values + trs_to_add_to_start
            trs_end = df_this_q['q_end_TR'].values + trs_to_add_to_end
            assert np.sum((trs_end - trs_start) > 2), 'slice problem in {}-{}-{}'.format(student_or_expert,
                                                                                            trs_start,trs_end)
            # extract mean across TRs of epi data per question: vox X subjects
            epi_temporal_mean_per_q[this_question, :, :] = np.array(
                [np.nanmean(epi_data[i_subject][:, np.int(trs_start[i_subject]):np.int(trs_end[i_subject])], axis=1) for
                 i_subject in np.arange(len(trs_start))]).transpose()
            # assert not np.sum(np.isnan(epi_temporal_mean_per_q[this_question,:,:])), 'nans here in {}, q{}'.format(student_or_expert,this_question)
        # update dict with epi data, mean over TRs
        corr_result[student_or_expert]['roi_epi_per_q'] = epi_temporal_mean_per_q
        # run spatial isc with questions as first dim (get per-question value) (output: question X subject)
        # student vs mean of student, expert-expert
        # run isc
        corr_result[student_or_expert]['roi_isc_per_q_vs_group_rval'] = func_isc_in_notebook(epi_temporal_mean_per_q,
                                                                                        collapse_subj=False)
        # get bootstrap dist
        if num_perms>0:
            d=corr_result[student_or_expert]['roi_isc_per_q_vs_group_rval'] # questions X subjects 
            corr_result[student_or_expert]['roi_isc_per_q_vs_group_dist']=np.zeros([number_of_subjects,num_perms])
            for this_subject in np.arange(number_of_subjects):
                observed, ci, pval, dist = isc.bootstrap_isc(d[:,this_subject], pairwise=False, summary_statistic='mean',n_bootstraps=num_perms, ci_percentile=0, random_state=None)
                corr_result[student_or_expert]['roi_isc_per_q_vs_group_dist'][this_subject,:] = dist.squeeze()
        else:
            corr_result[student_or_expert]['roi_isc_per_q_vs_group_dist']=None

        # student vs CLEAN mean of expert (omit patterns of expert that answered wrong)
        if 'student' in student_or_expert:
            if epi_data is not None:
                try:
                    expert_all = corr_result['expert']['roi_epi_per_q']  # get data
                    if not (('questions' in test_vid_name) or ('placement' in test_vid_name)):
                        # 1) non-clean version - for use when no questions are involved - running just on video - qualifies all 'answers'
                        expert_clean_collapsed = np.nanmean(expert_all,axis=2)                  
                    else:
                        # 2) CLEAN version   
                        clean_mask = (placement_by_q[
                                          'experts'] >= expert_accept_question_threshold).values.transpose()  # mask out wrong answers
                        # mask out bad expert responses, replace with nans
                        for v in np.arange(expert_all.shape[1]):
                            expert_all[:, v, :][np.invert(clean_mask)] = np.nan
                        # collapse over experts
                        expert_clean_collapsed = np.nanmean(
                            np.array([expert_all[:, v, :] for v in np.arange(expert_all.shape[1])]),
                            axis=2).transpose()
                    # get rval
                    corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_rval'] = func_isc_in_notebook(
                        epi_temporal_mean_per_q, collapse_subj=False, external_signal=expert_clean_collapsed)


                    # get bootstrap dist
                    if num_perms>0:
                        d=corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_rval'] # questions X students
                        corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_dist']=np.zeros([number_of_subjects,num_perms])
                        for this_subject in np.arange(number_of_subjects): # iter students
                            observed, ci, pval, dist = isc.bootstrap_isc(d[:,this_subject], pairwise=False, summary_statistic='mean',n_bootstraps=num_perms, ci_percentile=0, random_state=None)
                            corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_dist'][this_subject,:] = dist.squeeze()
                    else:
                        corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_dist']=None
                    if 'nots' in sim_or_sim_minus_nots:  # skip nots if not required
                        if 'experts' in vs_mean_of:
                            # get rval
                            corr_result[student_or_expert]['roi_isc_NOTS_per_q_student_vs_expert_rval'] = func_spatial_isc_q_minus_nots(
                                epi_temporal_mean_per_q, external_signal=expert_clean_collapsed)
                        elif 'students' in vs_mean_of:
                            # get rval
                            corr_result[student_or_expert]['roi_isc_NOTS_per_q_student_vs_student_rval'] = func_spatial_isc_q_minus_nots(
                                epi_temporal_mean_per_q)

                except KeyError: # no expert data
                    # zero out student-expert
                    corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_rval']=0
                    corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_dist']=0
                    
    if 'experts' in vs_mean_of:
        if 'nots' in sim_or_sim_minus_nots:
            # similarity minus nots: if this is correlated with score, then the similarity-score link is question specific, not global
            # no p-value here, because how to compute
            cx = corr_result['student']['roi_isc_per_q_student_vs_expert_rval'] - corr_result['student']['roi_isc_NOTS_per_q_student_vs_expert_rval']  # similarity minus nots
            px = None
        else:  # similarity
            #rval 
            cx = corr_result['student']['roi_isc_per_q_student_vs_expert_rval'].copy() # questions X students
            # pval for the mean value, variance source: subjects
            null_dist=np.tanh(np.nanmean(np.arctanh(corr_result['student']['roi_isc_per_q_student_vs_expert_dist']),axis=0)) # mean over subj
            if np.isnan(np.nanmean(cx)) or np.sum(np.isnan(null_dist)):
                px=np.nan
            else:
                px = isc.p_from_null(np.tanh(np.nanmean(np.arctanh(cx))),null_dist,side='right',exact=False)
    elif 'students' in vs_mean_of:
        cx = corr_result['student']['roi_isc_per_q_vs_group_rval']  # questions X students
        # pval for the mean value, variance source: subjects
        null_dist=np.tanh(np.nanmean(np.arctanh(corr_result['student']['roi_isc_per_q_vs_group_dist']),axis=0)) # mean over subj
        if np.isnan(np.nanmean(cx)) or np.sum(np.isnan(null_dist)):
            px=np.nan
        else:
            px = isc.p_from_null(np.tanh(np.nanmean(np.arctanh(cx))),null_dist,side='right',exact=False)

    if 'within' in correlation_with_score.lower(): # correlate similarity score with placement score within subjects
        corr_score_vec = np.zeros(cx.shape[1]) # rvals
        corr_perm_dist = np.zeros((num_perms,cx.shape[1])) # rand dist for each rval 
        for i_this_student in range(cx.shape[1]):
            # rvals and distribution for each, taken from that subject
            x = cx[:, i_this_student]
            y = placement_by_q['students'].iloc[i_this_student].values # scores
            # alternative y: response length instead of question score
            #$y_response_length=(df_q_timestamps[df_q_timestamps.subject==good_students[this_student]]).sort_values(by='q_number')['q_RT'].values
            corr_score_vec [i_this_student],corr_perm_dist[:, i_this_student] = func_corr_and_null_dist(x,y,num_perms=num_perms)
        # rval
        rval = isc.compute_summary_statistic(corr_score_vec)
        # pval
        within_dist = np.tanh(np.nanmean(np.arctanh(corr_perm_dist),axis=1)) # mean over subjects for each rand perm
        if np.isnan(rval) or np.sum(np.isnan(within_dist)):
            pval=np.nan
        else:
            pval=isc.p_from_null(rval,within_dist,side='right',exact=False)
        out_val=np.array([rval,pval])
            
    elif 'direct' in correlation_with_score.lower(): # correlate similarity to group and similarity to experts
        x=corr_result['student']['roi_isc_per_q_student_vs_expert_rval'] # questions X students
        y=corr_result['student']['roi_isc_per_q_vs_group_rval']   # questions X students
        # corr separately in each bin
        n_bins=x.shape[0]
        direct_dists=np.zeros((n_bins,num_perms))
        direct_rvals=np.zeros((n_bins))
        for this_bin in range(n_bins):
            direct_rvals[this_bin],direct_dists[this_bin,:]=\
                func_corr_and_null_dist(x[this_bin,:],y[this_bin,:],num_perms=num_perms) 
        rval=np.tanh(np.nanmean(np.arctanh(direct_rvals))) # take mean across bins
        direct_mean_dist=np.tanh(np.nanmean(np.arctanh(direct_dists),axis=0))
        if np.isnan(rval) or np.sum(np.isnan(direct_mean_dist)):
            pval=np.nan
        else:
            pval=isc.p_from_null(rval,direct_mean_dist,side='right',exact=False)
        #if save_direct_placement_perms:
        #    out_val = [rval,direct_mean_dist] # for recaps direct comprison, save perms of placement, sl output is npz
        #else:
        out_val = np.array([rval,pval]) # for placement direct comparison  - sl output is nii map
    
    
    elif 'skip' in correlation_with_score.lower():  # do not correlate with placement score, output sim score as is
        out_val = np.array([np.tanh(np.nanmean(np.arctanh(cx))),px]) # similarity: [r-value, p-value]

    return out_val
 

In [119]:
#func diag peritem - per-q (between) or per-student (within) rval and pval, do not average over questions\students

def func_diagonal_similarity_peritem(df_q_timestamps, placement_by_q, data_in_sl, sim_or_sim_minus_nots='sim',get_reg_coeff=False):
    number_of_questions = len(df_q_timestamps['q_number'].unique())  # 16 q in placement exam
    corr_result = {}
    for student_or_expert in sorted(subject_groups): # order is important- do expert before to allow comparison with student
        epi_data = data_in_sl[student_or_expert + 's'][
            'test_data']  # in no srm version, epi data is just the non-transformed test data
        number_of_subjects = len(epi_data)
        corr_result[student_or_expert] = {}
        corr_result[student_or_expert]['roi_epi_per_q'] = None  # timecourse data in roi, do isc on this
        corr_result[student_or_expert][
            'roi_isc_per_q_vs_group_rval'] = None  # r-value result of isc, questions are in the 'voxels' dim: student vs student, expert vs expert
        if 'student' in student_or_expert:
            corr_result['student']['roi_isc_per_q_student_vs_expert_rval'] = None  # rval result of pseudo-isc, student vs expert

        epi_temporal_mean_per_q = np.zeros(
        [number_of_questions, epi_data[0].shape[0], len(epi_data)])  # questions (instead of TR) X voxels X subjects
        for this_question in range(number_of_questions):
            # print(this_question)
            # slice this question: entries for this question, all subjects (expert or student separately)
            df_this_q = df_q_timestamps.loc[np.logical_and(df_q_timestamps['student_or_expert'] == student_or_expert,
                                                           df_q_timestamps['q_number'] == this_question)]
            df_this_q = df_this_q.sort_values(by=['subject'], ascending=True)
            # start and end points for this question (TR), for each subject
            trs_start = df_this_q['q_start_TR'].values + trs_to_add_to_start
            trs_end = df_this_q['q_end_TR'].values + trs_to_add_to_end
            assert np.sum((trs_end - trs_start) > 2), 'slice problem in {}-{}-{}-{}'.format(student_or_expert,
                                                                                            this_question, trs_end,
                                                                                            trs_start)
            # extract mean across TRs of epi data per question: vox X subjects
            epi_temporal_mean_per_q[this_question, :, :] = np.array(
                [np.nanmean(epi_data[i_subject][:, np.int(trs_start[i_subject]):np.int(trs_end[i_subject])], axis=1) for
                 i_subject in np.arange(len(trs_start))]).transpose()
            # assert not np.sum(np.isnan(epi_temporal_mean_per_q[this_question,:,:])), 'nans here in {}, q{}'.format(student_or_expert,this_question)
        # update dict with epi data, mean over TRs
        corr_result[student_or_expert]['roi_epi_per_q'] = epi_temporal_mean_per_q
        # run spatial isc with questions as first dim (get per-question value) (output: question X subject)
        # student vs mean of student, expert-expert
        # run isc
        corr_result[student_or_expert]['roi_isc_per_q_vs_group_rval'] = func_isc_in_notebook(epi_temporal_mean_per_q,
                                                                                        collapse_subj=False)
        # skip bootstrap dist
        #corr_result[student_or_expert]['roi_isc_per_q_vs_group_dist']=None

        # student vs CLEAN mean of expert (omit patterns of expert that answered wrong)
        if 'student' in student_or_expert:
            if epi_data is not None:
                try:
                    expert_all = corr_result['expert']['roi_epi_per_q']  # get data
                    if not (('questions' in test_vid_name) or ('placement' in test_vid_name)):
                        # 1) non-clean version - for use when no questions are involved - running just on video - qualifies all 'answers'
                        expert_clean_collapsed = np.nanmean(expert_all,axis=2)                  
                    else:
                        # 2) CLEAN version   
                        clean_mask = (placement_by_q[
                                          'experts'] >= expert_accept_question_threshold).values.transpose()  # mask out wrong answers
                        # mask out bad expert responses, replace with nans
                        for v in np.arange(expert_all.shape[1]):
                            expert_all[:, v, :][np.invert(clean_mask)] = np.nan
                        # collapse over experts
                        expert_clean_collapsed = np.nanmean(
                            np.array([expert_all[:, v, :] for v in np.arange(expert_all.shape[1])]),
                            axis=2).transpose()
                    # get rval (skip null dist)
                    corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_rval'] = func_isc_in_notebook(
                        epi_temporal_mean_per_q, collapse_subj=False, external_signal=expert_clean_collapsed)
                    
                except KeyError: # no expert data
                    # zero out student-expert
                    corr_result[student_or_expert]['roi_isc_per_q_student_vs_expert_rval']=0
                   
    if 'experts' in vs_mean_of:
        #rval 
        cx = corr_result['student']['roi_isc_per_q_student_vs_expert_rval'].copy() # questions X students
        # pval for the mean value, variance source: subjects
        px=np.nan
    elif 'students' in vs_mean_of:
        cx = corr_result['student']['roi_isc_per_q_vs_group_rval']  # questions X students
        # pval for the mean value, variance source: subjects
        px=np.nan      
    out_val=np.nan
    
    
    if 'skip' in correlation_with_score.lower():
        out_val=cx # questions X students - full output, no collapse, no corrw question score
    #
    #between: output is per-q rval,pval
    if 'between' in correlation_with_score.lower():  # correlate similarity score with placement score between subjects
        corr_score_vec = np.zeros(cx.shape[0]) # rvals per q
        corr_perm_dist = np.zeros((num_perms,cx.shape[0])) # rand dist for each rval 
        corr_pval_vec = np.zeros(cx.shape[0]) # pval from corr_score_vec and corr_perm_dist, per q
        for i_this_q in range(cx.shape[0]):
            # rvals and distribution for each, taken from that question
            x = cx[i_this_q,:]
            y = placement_by_q['students'][i_this_q].values
            corr_score_vec[i_this_q],corr_perm_dist[:, i_this_q] = func_corr_and_null_dist(x,y,num_perms=num_perms)
            if np.isnan(corr_score_vec[i_this_q]) or np.sum(np.isnan(corr_perm_dist[:, i_this_q])):
                corr_pval_vec[i_this_q]=np.nan
            else:
                corr_pval_vec[i_this_q]=isc.p_from_null(corr_score_vec[i_this_q],corr_perm_dist[:, i_this_q],side='right',exact=False,axis=0)
        out_val= corr_score_vec,corr_pval_vec # tuple len 2, each item is a (16,) vector  
    
    #within: output is per-student rval,pval
    if 'within' in correlation_with_score.lower():  # correlate similarity score with placement score
        # for every subject, use per-q data, correlate similarity to experts (16 vals) with question score (16 vals): return mean over subjects
        corr_score_vec = np.zeros(cx.shape[1]) # rvals, vector size number of subjects
        corr_perm_dist = np.zeros((num_perms,cx.shape[1])) # rand dist for each rval 
        corr_pval_vec = np.zeros(cx.shape[1]) # pval from corr_score_vec and corr_perm_dist, per q
        corr_trendline_coeff_a=np.zeros(cx.shape[1]) # regression line coefficient a
        corr_trendline_coeff_b=np.zeros(cx.shape[1]) # regression line coefficient b
        for this_subject in np.arange(number_of_subjects):
            # rvals and distribution for each, taken from that subject
            # x is sim (or sim nots)
            x = cx[:, this_subject]
            y = placement_by_q['students'].iloc[this_subject].values  # y is vec q scores this subject
            corr_score_vec [this_subject],corr_perm_dist[:, this_subject] = func_corr_and_null_dist(x,y,num_perms=num_perms)
            if np.isnan(corr_score_vec[this_subject]) or np.sum(np.isnan(corr_perm_dist[:, this_subject])):
                corr_pval_vec[this_subject]=np.nan
            else:
                corr_pval_vec[this_subject]=isc.p_from_null(corr_score_vec[this_subject],corr_perm_dist[:, this_subject],side='right',exact=False,axis=0)
            if get_reg_coeff:
                regline=np.polyfit(x, y, 1)
                corr_trendline_coeff_a[this_subject]=regline[0]
                corr_trendline_coeff_b[this_subject]=regline[1]    
        if not get_reg_coeff:
            out_val = corr_score_vec,corr_pval_vec # tuple len 2, each item is a (n_subj,) vector 
        else: # get regression coefficients for reg line (a,b, for ax+b)
            out_val = corr_score_vec,corr_pval_vec, corr_trendline_coeff_a,corr_trendline_coeff_b # tuple len 4, each item is a (n_subj,) vector, a, b 

        
    return out_val


In [120]:
def func_isfc_similarity(df_q_timestamps, placement_by_q, data_in_sl):
    """
    isfc similarity: knowledge structure 
    use experts isfc as template for students isfc off-diagonal
    """
    number_of_questions = len(df_q_timestamps['q_number'].unique())  # 16 q in placement exam
    corr_result = {}

    # first, get ISFC matrices for experts (collapsed) and students (not collapsed)
    for student_or_expert in sorted(subject_groups): # order is important - do expert before to allow comparison with student
        epi_data = data_in_sl[student_or_expert + 's'][
            'test_data']  # in no srm version, epi data is just the non-transformed test data
        corr_result[student_or_expert] = {}
        corr_result[student_or_expert]['roi_epi_per_q'] = None  # timecourse data in roi, do isc on this
        corr_result[student_or_expert]['q-q-similarity'] = None  # # per participant, question-to-question similarity
        epi_temporal_mean_per_q = np.zeros(
            [number_of_questions, epi_data[0].shape[0], len(epi_data)])  # questions (instead of TR) X voxels X subjects
        
        for this_question in range(number_of_questions):
            #print(this_question)
            #df_q_timestamps
            # slice this question: entries for this question, all subjects (expert or student separately)
            df_this_q = df_q_timestamps.loc[np.logical_and(df_q_timestamps['student_or_expert'] == student_or_expert,
                                                           df_q_timestamps['q_number'] == this_question)]
            df_this_q = df_this_q.sort_values(by=['subject'], ascending=True)
            
            
            # start and end points for this question (TR), for each subject
            trs_start = df_this_q['q_start_TR'].values + trs_to_add_to_start
            trs_end = df_this_q['q_end_TR'].values + trs_to_add_to_end
            assert np.sum((trs_end - trs_start) > 2), 'slice problem in {}-{}-{}-{}'.format(student_or_expert,
                                                                                            this_question, trs_end,
                                                                                            trs_start)
            # extract mean across TRs of epi data per question: vox X subjects
            epi_temporal_mean_per_q[this_question, :, :] = np.array(
                [np.nanmean(epi_data[i_subject][:, np.int(trs_start[i_subject]):np.int(trs_end[i_subject])], axis=1) for
                 i_subject in np.arange(len(trs_start))]).transpose()
            # assert not np.sum(np.isnan(epi_temporal_mean_per_q[this_question,:,:])), 'nans here in {}, q{}'.format(student_or_expert,this_question)
        # update dict with epi data, mean over TRs
        corr_result[student_or_expert]['roi_epi_per_q'] = epi_temporal_mean_per_q
        # epi_per_question in original code -> epi_temporal_mean_per_q

        if 'expert' in subject_groups:
            # calc expert template and use that for ISFC of students,experts
            if 'expert' in student_or_expert:
                if not (('questions' in test_vid_name) or ('placement' in test_vid_name)):
                    # 1) non-clean version - for use when no questions are involved - running just on video - qualifies all 'answers'
                    experts_clean_collapsed = np.nanmean(epi_temporal_mean_per_q,axis=2)
                else:
                    # 2) CLEAN version   
                    clean_mask = (placement_by_q[
                                      'experts'] >= expert_accept_question_threshold).values.transpose()  # mask out wrong answers
                    # mask out bad expert responses - replace with nans
                    for v in np.arange(epi_temporal_mean_per_q.shape[1]):
                        epi_temporal_mean_per_q[:, v, :][np.invert(clean_mask)] = np.nan
                    # mean over experts
                    experts_clean_collapsed = np.nanmean(np.array(
                        [epi_temporal_mean_per_q[:, v, :] for v in np.arange(epi_temporal_mean_per_q.shape[1])]),
                                                      axis=2).transpose()

                # update dict (unused, here for clarity)
                corr_result[student_or_expert]['template'] = experts_clean_collapsed.copy()

            # for both students and experts
            # calc ISFC with experts as template
            # collapse over experts to create template, don't collapse over students to keep individual
            do_isfc_mean_over_subj = True if 'expert' in student_or_expert else False
            corr_result[student_or_expert]['isfc_mat'] = \
                func_isfc_in_notebook(corr_result[student_or_expert]['roi_epi_per_q'],\
                collapse_subj=do_isfc_mean_over_subj, external_signal=experts_clean_collapsed)

        # for students only: calc standard ISFC, leave one out
        if 'student' in student_or_expert:
            corr_result[student_or_expert]['isfc_mat'] = \
                func_isfc_in_notebook(corr_result[student_or_expert]['roi_epi_per_q'],\
                collapse_subj=False)


    #second, do row-by-row (q-by-q) correlation between st and ex matrices
    # get data
    students_mat = corr_result['student']['isfc_mat'].copy()
    if 'expert' in subject_groups:
        experts_mat = corr_result['expert']['isfc_mat'].copy()
    number_of_questions = students_mat.shape[0]
    number_of_subjects = students_mat.shape[2]
    # corr each question in each subject with experts' pattern for question and question-nots
    sim_rval = np.zeros([number_of_questions, number_of_subjects])  # similarity for each question, subject
    sim_dist = np.zeros([number_of_questions, number_of_subjects, num_perms])
    for this_question in np.arange(number_of_questions):
        # get (mean of) experts' pattern for this question
        if 'expert' in subject_groups:
            experts_this_q = experts_mat[this_question, :]
            experts_this_q_nots = experts_mat[np.arange(len(experts_mat)) != this_question, :]  # questions-1 X questions
        students_this_q_nots = students_mat[np.arange(len(students_mat)) != this_question, :, :]
        for this_subject in np.arange(number_of_subjects):
            this_student_this_q = students_mat[this_question, :, this_subject]  # sim pattern for this st, this q
            group_this_q = np.mean(students_mat[this_question, :, np.arange(students_mat.shape[2]) != this_subject],
                                   axis=0)  # sim pattern for all other st, this q (ISC)
            # (1) sim_rval: correlate this question sim pattern in expert/group of students, this question sim pattern in student
            # but omit same-q corr, will be 1 for both experts and students, drive corr up artificially
            x = this_student_this_q[np.arange(len(this_student_this_q)) != this_question].copy()         
            if 'experts' in vs_mean_of:
                # correlate this-question sim pattern in student, this-question patterns in experts
                y_experts = experts_this_q[np.arange(len(experts_this_q)) != this_question].copy()
                x_corr_y_rval,x_corr_y_dist=func_corr_and_null_dist(x,y_experts,num_perms=num_perms) 
            elif 'students' in vs_mean_of:
                # correlate this-question sim pattern in student, this-question patterns in group of students
                y_students = group_this_q[np.arange(len(this_student_this_q)) != this_question].copy()
                x_corr_y_rval,x_corr_y_dist=func_corr_and_null_dist(x,y_students,num_perms=num_perms)
            sim_rval[this_question, this_subject] = x_corr_y_rval.copy()
            sim_dist[this_question, this_subject,:] = x_corr_y_dist.copy()


    # prep for out / corr and out
    # rval
    cx = sim_rval.copy()  # questions X students
    # pval
    # collapse across questions and students to get null dist for mean
    # sim_dist: questions X subjects X perms
    temp=np.tanh(np.nanmean(np.arctanh(sim_dist),axis=0))
    null_dist=np.tanh(np.nanmean(np.arctanh(temp),axis=0))
    if np.isnan(np.nanmean(cx)) or np.sum(np.isnan(null_dist)):
        px=np.nan
    else:
        px = isc.p_from_null(np.tanh(np.nanmean(np.arctanh(cx))),null_dist,side='right',exact=False)

    if 'within' in correlation_with_score.lower():  # correlate similarity score with placement score
        # for every subject, use per-q data, correlate similarity to experts (16 vals) with question score (16 vals): return mean over subjects
        corr_score_vec = np.zeros(cx.shape[1]) # rvals, vector size number of subjects
        corr_perm_dist = np.zeros((num_perms,cx.shape[1])) # rand dist for each rval 
        for this_subject in np.arange(number_of_subjects):
            # rvals and distribution for each, taken from that subject
            # x is sim (or sim nots)
            x = cx[:, this_subject]
            y = placement_by_q['students'].iloc[this_subject].values  # y is vec q scores this subject
            corr_score_vec [this_subject],corr_perm_dist[:, this_subject] = func_corr_and_null_dist(x,y,num_perms=num_perms)
        #rval summary - as in searchlight
        rval = isc.compute_summary_statistic(corr_score_vec)
        #pval
        within_dist = np.tanh(np.nanmean(np.arctanh(corr_perm_dist),axis=1)) # mean over subjects for each rand perm
        if np.isnan(rval) or np.sum(np.isnan(within_dist)):
            pval=np.nan
        else:
            pval=isc.p_from_null(rval,within_dist,side='right',exact=False)
        out_val = np.array([rval,pval])
        
        
    elif 'skip' in correlation_with_score.lower():  # do not correlate with placement score, output sim score as is
        out_val = np.array([np.tanh(np.nanmean(np.arctanh(cx))),px])
    
    return out_val



In [121]:
def func_isfc_similarity_peritem(df_q_timestamps, placement_by_q, data_in_sl,get_reg_coeff=False):
    """
    isfc similarity:
    use experts isfc as template for students isfc off-diagonal
    peritem: returns values for all subjects, in 'within' corr, all q in 'between' corr
    get_reg_coeff: get trend line coeffients ax+b
    """

    number_of_questions = len(df_q_timestamps['q_number'].unique())  # 16 q in placement exam
    corr_result = {}

    # first, get ISFC matrices for experts (collapsed) and students (not collapsed)
    for student_or_expert in sorted(subject_groups): # order is important - do expert before to allow comparison with student
        epi_data = data_in_sl[student_or_expert + 's'][
            'test_data']  # in no srm version, epi data is just the non-transformed test data
        corr_result[student_or_expert] = {}
        corr_result[student_or_expert]['roi_epi_per_q'] = None  # timecourse data in roi, do isc on this
        corr_result[student_or_expert]['q-q-similarity'] = None  # # per participant, question-to-question similarity
        epi_temporal_mean_per_q = np.zeros(
            [number_of_questions, epi_data[0].shape[0], len(epi_data)])  # questions (instead of TR) X voxels X subjects
        for this_question in range(number_of_questions):
            # print(this_question)
            # slice this question: entries for this question, all subjects (expert or student separately)
            df_this_q = df_q_timestamps.loc[np.logical_and(df_q_timestamps['student_or_expert'] == student_or_expert,
                                                           df_q_timestamps['q_number'] == this_question)]
            df_this_q = df_this_q.sort_values(by=['subject'], ascending=True)
            # start and end points for this question (TR), for each subject
            trs_start = df_this_q['q_start_TR'].values + trs_to_add_to_start
            trs_end = df_this_q['q_end_TR'].values + trs_to_add_to_end
            assert np.sum((trs_end - trs_start) > 2), 'slice problem in {}-{}-{}-{}'.format(student_or_expert,
                                                                                            this_question, trs_end,
                                                                                            trs_start)
            # extract mean across TRs of epi data per question: vox X subjects
            epi_temporal_mean_per_q[this_question, :, :] = np.array(
                [np.nanmean(epi_data[i_subject][:, np.int(trs_start[i_subject]):np.int(trs_end[i_subject])], axis=1) for
                 i_subject in np.arange(len(trs_start))]).transpose()
            # assert not np.sum(np.isnan(epi_temporal_mean_per_q[this_question,:,:])), 'nans here in {}, q{}'.format(student_or_expert,this_question)
        # update dict with epi data, mean over TRs
        corr_result[student_or_expert]['roi_epi_per_q'] = epi_temporal_mean_per_q
        # epi_per_question in original code -> epi_temporal_mean_per_q

        if 'expert' in subject_groups:
            # calc expert template and use that for ISFC of students,experts
            if 'expert' in student_or_expert:
                if not (('questions' in test_vid_name) or ('placement' in test_vid_name)):
                    # 1) non-clean version - for use when no questions are involved - running just on video - qualifies all 'answers'
                    experts_clean_collapsed = np.nanmean(epi_temporal_mean_per_q,axis=2)
                else:
                    # 2) CLEAN version   
                    clean_mask = (placement_by_q[
                                      'experts'] >= expert_accept_question_threshold).values.transpose()  # mask out wrong answers
                    # mask out bad expert responses - replace with nans
                    for v in np.arange(epi_temporal_mean_per_q.shape[1]):
                        epi_temporal_mean_per_q[:, v, :][np.invert(clean_mask)] = np.nan
                    # mean over experts
                    experts_clean_collapsed = np.nanmean(np.array(
                        [epi_temporal_mean_per_q[:, v, :] for v in np.arange(epi_temporal_mean_per_q.shape[1])]),
                                                      axis=2).transpose()

                # update dict (unused, here for clarity)
                corr_result[student_or_expert]['template'] = experts_clean_collapsed.copy()

            # for both students and experts
            # calc ISFC with experts as template
            # collapse over experts to create template, don't collapse over students to keep individual
            do_isfc_mean_over_subj = True if 'expert' in student_or_expert else False
            corr_result[student_or_expert]['isfc_mat'] = \
                func_isfc_in_notebook(corr_result[student_or_expert]['roi_epi_per_q'],\
                collapse_subj=do_isfc_mean_over_subj, external_signal=experts_clean_collapsed)

        # for students only: calc standard ISFC, leave one out
        if 'student' in student_or_expert:
            corr_result[student_or_expert]['isfc_mat'] = \
                func_isfc_in_notebook(corr_result[student_or_expert]['roi_epi_per_q'],\
                collapse_subj=False)


    #second, do row-by-row (q-by-q) correlation between st and ex matrices
    # get data
    students_mat = corr_result['student']['isfc_mat'].copy()
    if 'expert' in subject_groups:
        experts_mat = corr_result['expert']['isfc_mat'].copy()
    number_of_questions = students_mat.shape[0]
    number_of_subjects = students_mat.shape[2]
    # corr each question in each subject with experts' pattern for question and question-nots
    sim_rval = np.zeros([number_of_questions, number_of_subjects])  # similarity for each question, subject
    #sim_dist = np.zeros([number_of_questions, number_of_subjects, num_perms])
    for this_question in np.arange(number_of_questions):
        # get (mean of) experts' pattern for this question
        if 'expert' in subject_groups:
            experts_this_q = experts_mat[this_question, :]
            experts_this_q_nots = experts_mat[np.arange(len(experts_mat)) != this_question, :]  # questions-1 X questions
        students_this_q_nots = students_mat[np.arange(len(students_mat)) != this_question, :, :]
        for this_subject in np.arange(number_of_subjects):
            this_student_this_q = students_mat[this_question, :, this_subject]  # sim pattern for this st, this q
            group_this_q = np.mean(students_mat[this_question, :, np.arange(students_mat.shape[2]) != this_subject],
                                   axis=0)  # sim pattern for all other st, this q (ISC)
            # (1) sim_rval: correlate this question sim pattern in expert/group of students, this question sim pattern in student
            # but omit same-q corr, will be 1 for both experts and students, drive corr up artificially
            x = this_student_this_q[np.arange(len(this_student_this_q)) != this_question].copy()         
            if 'experts' in vs_mean_of:
                # correlate this-question sim pattern in student, this-question patterns in experts
                y_experts = experts_this_q[np.arange(len(experts_this_q)) != this_question].copy()
                x_corr_y_rval,x_corr_y_dist=func_corr_and_null_dist(x,y_experts,num_perms=0) # skip dist
            elif 'students' in vs_mean_of:
                # correlate this-question sim pattern in student, this-question patterns in group of students
                y_students = group_this_q[np.arange(len(this_student_this_q)) != this_question].copy()
                x_corr_y_rval,x_corr_y_dist=func_corr_and_null_dist(x,y_students,num_perms=0)
            sim_rval[this_question, this_subject] = x_corr_y_rval.copy()
            #sim_dist[this_question, this_subject,:] = x_corr_y_dist.copy()


    # prep for out / corr and out
    # rval
    cx = sim_rval.copy()  # questions X students
    # pval
    # collapse across questions and students to get null dist for mean
    # skip sim_dist
    px=np.nan

    out_val=np.nan
    
    
    if 'skip' in correlation_with_score.lower():
        out_val=cx # questions X students - full output, no collapse, no corrw question score

    
    #between: output is per-q rval,pval
    if 'between' in correlation_with_score.lower():  # correlate similarity score with placement score between subjects
        corr_score_vec = np.zeros(cx.shape[0]) # rvals per q
        corr_perm_dist = np.zeros((num_perms,cx.shape[0])) # rand dist for each rval 
        corr_pval_vec = np.zeros(cx.shape[0]) # pval from corr_score_vec and corr_perm_dist, per q
        for i_this_q in range(cx.shape[0]):
            # rvals and distribution for each, taken from that question
            x = cx[i_this_q,:]
            y = placement_by_q['students'][i_this_q].values
            corr_score_vec[i_this_q],corr_perm_dist[:, i_this_q] = func_corr_and_null_dist(x,y,num_perms=num_perms)
            if np.isnan(corr_score_vec[i_this_q]) or np.sum(np.isnan(corr_perm_dist[:, i_this_q])):
                corr_pval_vec[i_this_q]=np.nan
            else:
                corr_pval_vec[i_this_q]=isc.p_from_null(corr_score_vec[i_this_q],corr_perm_dist[:, i_this_q],side='right',exact=False,axis=0)
        out_val= corr_score_vec,corr_pval_vec # tuple len 2, each item is a (16,) vector  


    #within: output is per-student rval,pval
    if 'within' in correlation_with_score.lower():  # correlate similarity score with placement score
        # for every subject, use per-q data, correlate similarity to experts (16 vals) with question score (16 vals): return mean over subjects
        corr_score_vec = np.zeros(cx.shape[1]) # rvals, vector size number of subjects
        corr_perm_dist = np.zeros((num_perms,cx.shape[1])) # rand dist for each rval 
        corr_pval_vec = np.zeros(cx.shape[1]) # pval from corr_score_vec and corr_perm_dist, per q
        corr_trendline_coeff_a=np.zeros(cx.shape[1]) # regression line coefficient a
        corr_trendline_coeff_b=np.zeros(cx.shape[1]) # regression line coefficient b
        for this_subject in np.arange(number_of_subjects):
            # rvals and distribution for each, taken from that subject
            # x is sim (or sim nots)
            x = cx[:, this_subject]
            y = placement_by_q['students'].iloc[this_subject].values  # y is vec q scores this subject
            corr_score_vec [this_subject],corr_perm_dist[:, this_subject] = func_corr_and_null_dist(x,y,num_perms=num_perms)
            #pval
            if np.isnan(corr_score_vec[this_subject]) or np.sum(np.isnan(corr_perm_dist[:, this_subject])):
                corr_pval_vec[this_subject]=np.nan
            else:
                corr_pval_vec[this_subject]=isc.p_from_null(corr_score_vec[this_subject],corr_perm_dist[:, this_subject],side='right',exact=False,axis=0)
            #coeff
            if get_reg_coeff:
                regline=np.polyfit(x, y, 1)
                corr_trendline_coeff_a[this_subject]=regline[0]
                corr_trendline_coeff_b[this_subject]=regline[1]
            
        # for all rval
        if not get_reg_coeff:
            out_val = corr_score_vec,corr_pval_vec # tuple len 2, each item is a (n_subj,) vector 
        else: # get regression coefficients for reg line (a,b, for ax+b)
            out_val = corr_score_vec,corr_pval_vec, corr_trendline_coeff_a,corr_trendline_coeff_b # tuple len 4, each item is a (n_subj,) vector, a, b 


    return out_val


Main

In [57]:
#def main(argv=None):

argv=None
# load argv
if argv is None:
    argv = sys.argv

# declare all global params (use global for searchlight via MPI)
global test_vid_week
global test_vid_name
global similarity_type
global correlation_with_score
global vs_mean_of
global sl_rad
global num_perms
global threshold
global train_vid_week
global train_vid_name
global do_srm

# load params

if use_test_params or use_roi:
    print('Loading fixed params for TEST')
    train_vid_week = None
    train_vid_name = None
    do_srm = False
    test_vid_week = 'wk6'
    test_vid_name = 'placement'
    similarity_type = 'diagonal'
    correlation_with_score = 'within'
    vs_mean_of = 'student-vs-experts'
    sl_rad = int(2)
    num_perms = int(30)
    threshold = float(0.05)
    
else: # use command line params
    if 'wk' in sys.argv[1]:
        train_vid_week = sys.argv[1]
        train_vid_name = sys.argv[2]
        do_srm = True
        print('Loading params from command line')
        print('SRM: on')
        print('SRM training video: {}-{}'.format(train_vid_week,train_vid_name))
    else:
        train_vid_week = None # train data for SRM (None = no SRM)
        train_vid_name = None
        do_srm = False
    # test data for SRM (if no SRM, use test data only)
    test_vid_week = sys.argv[3]  # 'wk6'
    test_vid_name = sys.argv[4]  # 'placement'
    # similarity: 1st order / 2nd order
    similarity_type = sys.argv[5]  #diagonal
    # correlation with score: skip/within-subj
    correlation_with_score = sys.argv[6]
    # vs mean of: calc similarity for student-vs-experts / student-vs-students
    vs_mean_of = sys.argv[7]
    # sl_rad # searchligh radius
    sl_rad = int(sys.argv[8])
    # perms for stats: 0-1000
    num_perms = int(sys.argv[9])
    # maps p-value threshold (FDR corrected)
    threshold = float(0.05)
    # print
    print('Input video (=SRM test): {}-{}'.format(test_vid_week,test_vid_name))
    print('Similarity type: {}'.format(similarity_type))
    print('Correlate with score: {}'.format(correlation_with_score))
    print('student-experts or student-students: {}'.format(vs_mean_of))
    print('Searchlight edge: {}, size = {} voxels'.format(sl_rad,(1+2*sl_rad)**3))
    print('Number of perms: {}'.format(num_perms))
    print('p-value threshold for maps, FDR corrected: {}'.format(threshold))


# MPI - parallelization
global comm,rank,size
comm = MPI.COMM_WORLD
rank = comm.rank  # rank = comm.Get_rank()
size = comm.size  # size = comm.Get_size()
print()
print('mpi info')
print(comm)
print(rank)
print(size)


# load brain mask
global brain_mask_3mm
brain_mask_3mm = io.load_boolean_mask(const_mni_brain_file_name, lambda x: x > 0.05)  # check edges!

# build dict for all filenames, nested
global student_and_expert_files
student_and_expert_files = listdir(input_fslfeat_students_path) + listdir(input_fslfeat_experts_path)
global task_name_template
#task_name_template = "(s\d{3})_(wk\d+)_([0-9a-zA-Z]*)_mni"  # template for data preprocessed with FSL
task_name_template = "(s\d{3})_(wk\d+)_([0-9a-zA-Z]*)_6motion_mni"  # template for data preprocessed with FSL + regressout 6 motion
global filenames_dict
filenames_dict = build_dict_filenames(student_and_expert_files)

Loading fixed params for TEST

mpi info
<mpi4py.MPI.Intracomm object at 0x7f2a5291e610>
0
1


In [122]:
# read logs and get behavioral data (placement and isq)
global df_q_timestamps, placement_by_q,good_students,good_experts

df_q_timestamps, placement_by_q = read_placement_logs()
# if 'rtregout' in correlation_with_score.lower(): # use grades after regressing out RT
#     placement_by_q['students']=placement_by_q['students_clean']
good_students = placement_by_q['students'].index.tolist()
good_experts = placement_by_q['experts'].index.tolist()   


In [35]:
# set parameters for questions
global expert_accept_question_threshold,trs_to_add_to_end,trs_to_add_to_start
expert_accept_question_threshold = 2  # for expert pattern used to compare student to, only consider equal-or-above-threshold (correct) answers: use 0 to qualify all answers
# trim first 8s of question (before response)
trs_to_add_to_end = 0  # pattern of question includes TRs after answer (0s)
trs_to_add_to_start = 4  # pattern of question starts TRs after question start time (8s)
# set parameters for searchlight
# The size of the searchlight's radius, excluding the center voxel. This means the total volume size of the searchlight, if using a cube, is defined as: ((2 * sl_rad) + 1) ^ 3.
#sl_rad = 3  # searchlight size (of each edge) will be 1+2*sl_rad; sl_rad=1->27 voxels, =2->125, =3->343 voxels #set by argv
global max_blk_edge,pool_size,sl_mask,nfeature,niter
max_blk_edge = 10  # size of block searchlight distributes
pool_size = 1  # cores per task
sl_mask = brain_mask_3mm

if do_srm:
    # sanity check for SRM parameters
    nfeature = 20  # number of features in SRM for each searchlight
    niter = 20  # number of iterations in SRM
    if nfeature > (1 + 2 * sl_rad) ** 3:
        print ('nfeature truncated')
        nfeature = int((1 + 2 * sl_rad) ** 3)
else:  # no srm
    niter = 0
    nfeature = 0

# sanity check for searchlight parameters
if sl_rad <= 0:
    raise ValueError('sl_rad must be positive')

In [36]:
# LOAD DATA
# epi_list: data for searchlight
# first half: train, second half: test (if SRM)
# first loc_split items in train/test: students, the rest-experts
global subject_groups, epi_list,loc_split
if ('questions' in test_vid_name): # no expert data for questions
    subject_groups = ['student']    
    all_epi_data = load_epi_data(['students'])
else:
    subject_groups = ['expert','student'] 
    all_epi_data = load_epi_data(['students','experts'])
epi_list = []
loc_split = len(all_epi_data['students']['test_data'])
if do_srm:
    epi_list += all_epi_data['students']['train_data']
    epi_list += all_epi_data['experts']['train_data']
epi_list += all_epi_data['students']['test_data']
epi_list += all_epi_data['experts']['test_data']

# print searchlight info
if rank == 0:
    print_sl_info(epi_list,sl_rad,nfeature,loc_split)

experts
Rank 0: Loading subjects -> [0 1 2 3]


  np.expand_dims(sstd, axis=axis))
  np.expand_dims(sstd, axis=axis))


students
Rank 0: Loading subjects -> [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]

Test set, 19 subjects, load time: 116.78s
Test on files:
['/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s102_wk6_placement_6motion_mni.nii.gz', '/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s103_wk6_placement_6motion_mni.nii.gz', '/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s105_wk6_placement_6motion_mni.nii.gz', '/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s106_wk6_placement_6motion_mni.nii.gz', '/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s107_wk6_placement_6motion_mni.nii.gz', '/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s108_wk6_placement_6motion_mni.nii.gz', '/mnt/bucket/labs/hasson/meshulam/onlineL/pred20/scan_data_nii/students_mni/6motion/s110_wk6_placemen

In [125]:
# set params
use_roi=True
min_number_of_vox_in_roi=5
do_srm = False # for standard funcs only, untested
test_vid_week = 'wk6'
test_vid_name = 'placement'
sl_rad = int(2)
threshold = float(0.05)
# set epi list /l
l=epi_list
epi_list = l

bcast_var=[niter, nfeature, loc_split]
d1, d2, d3, ntr_train = l[0].shape  # var ntr_train unused
nvx = d1 * d2 * d3  # number of vox



number of perms is set to 1000


similarity_type:  
+ 'diagonal' per-question 
+ 'isfc' knowledge structure

do_collapse:
+ 'True' collapse across items (for analysis) & write to file df_rois_similarity.csv
+ 'False' do not collapse (for plots) & write to files df_rois_similarity_per_question_and_student.csv' (skip) , 

correlation_with_score:
+ 'skip': raw similarity-to-class and similarity-to-experts
+ 'within': correlation between similarity and exam score
+ 'direct': correlation between similarity-to-class and similarity-to-experts

In [172]:
# for Figure 4,5 ('placement'), run 'within'
# for Figure 3b ('direct'), run 'direct' & do_collapse=True

do_collapse=True # True: collapse over subjects and questions, False: between yields per-q, within yields per-student


sim_df=pd.DataFrame() # results
t0=time.time()
for correlation_with_score in ['skip','within','direct']:

    if 'skip' in correlation_with_score:
        num_perms = int(1)
    else:
        num_perms = int(1000) 
    print('number of perms is set to {}'.format(num_perms))

    
    print('Corr: '+correlation_with_score)
    if not do_collapse: # not grand mean over q and st
        #sim_df=pd.DataFrame() # init separately for within
        if 'within' in correlation_with_score:
            print('Get per-student results')
        elif 'skip' in correlation_with_score:
            print('Get per-student per-question results, no correlation with score')
        elif 'direct' in correlation_with_score:
            print('correlate each student with st,ex then correlate 20 st-st and 20 st-ex vals in each q')

            
    for similarity_type in ['isfc','diagonal']:
    #for similarity_type in ['diagonal']:
        print('Type: '+similarity_type)
        # use diagonal func to calc 'direct'
        if 'direct' in correlation_with_score and 'isfc' in similarity_type:
            continue
        for all_or_thr in ['all']: # all voxels in roi
            print('thr: '+all_or_thr)
            for vs_mean_of in ['student-vs-experts','student-vs-students']:
                print('vs mean of: '+vs_mean_of)
                t1=time.time()
                # skip st-vs-st if direct
                if 'direct' in correlation_with_score and 'student-vs-students' in vs_mean_of:
                    break
                for this_roi_name in masks.keys():
                    #for this_roi_hemi in masks[this_roi_name][all_or_thr+'_voxels'].keys():
                    for this_roi_hemi in ['bilateral']:
                        # get mask
                        this_roi_mask=masks[this_roi_name][all_or_thr+'_voxels'][this_roi_hemi].copy()
                        # make sure there's enough voxels in this roi mask
                        if np.int(np.sum(this_roi_mask))<min_number_of_vox_in_roi:
                            break
                        # reset data and reshape
                        data_in_sl = dict()
                        data_in_sl['students'] = {}
                        data_in_sl['students']['test_data'] = []
                        data_in_sl['experts'] = {}
                        data_in_sl['experts']['test_data'] = []
                        tt_data = 'test_data'

                        # first half of dataset list ('test') is students, second half experts
                        # also do reshape
                        for s in epi_list[:bcast_var[2]]:  # st
                            if s is not None:
                                tt_ntr = s.shape[3]  # number of TRs, can vary between subjects
                                data_in_sl['students'][tt_data].append(np.reshape(s, (nvx, tt_ntr)))
                        for s in epi_list[bcast_var[2]:]:  # ex
                            if s is not None:
                                tt_ntr = s.shape[3]
                                data_in_sl['experts'][tt_data].append(np.reshape(s, (nvx, tt_ntr)))


                        if use_roi: # for ROI instead of whole-brain
                            data_in_sl = func_roi_data(data_in_sl,this_roi_mask,this_roi_name,this_roi_hemi)
                        
                        # run corr
                        if 'diagonal' in similarity_type: # same-question analysis
                            if not do_collapse: 
                                ret_val = func_diagonal_similarity_peritem(df_q_timestamps, placement_by_q, data_in_sl,sim_or_sim_minus_nots='sim',get_reg_coeff=True)
                            else:
                                ret_val = func_diagonal_similarity(df_q_timestamps, placement_by_q, data_in_sl,sim_or_sim_minus_nots='sim')
                        elif 'isfc' in similarity_type: # knowledge structure analysis
                            if not do_collapse:
                                ret_val = func_isfc_similarity_peritem(df_q_timestamps, placement_by_q, data_in_sl,get_reg_coeff=True)
                            else:
                                ret_val = func_isfc_similarity(df_q_timestamps, placement_by_q, data_in_sl)
                            

                        if not do_collapse:
                            #if 'within' in correlation_with_score pd contains data per subject, ind is subj sorted
                            #if 'between' in correlation_with_score pd contains data per question, ind is q
                            # if 'skip' in correlation_with_score pd contains data per q and s, no corrw
                            # students\question num, not id, sorted
                            if 'skip' in correlation_with_score:#skip, ret_val is questions x subjects
                                temp_sim=pd.DataFrame()
                                for this_subject in range(ret_val.shape[1]):
                                    temp_sim_s=pd.DataFrame({'question': range(ret_val.shape[0]),\
                                           'subject': this_subject,\
                                           'corr_w_score': correlation_with_score,\
                                           'score_type': 'placement',\
                                           'sim_type': similarity_type,\
                                           'roi_type': 'anatomical',\
                                           'vs_mean_of': vs_mean_of,\
                                           'roi_name': this_roi_name,\
                                           'roi_hemi': this_roi_hemi,\
                                           'n_voxels':data_in_sl['students']['test_data'][0].shape[0],\
                                           'rval':ret_val[:,this_subject],\
                                           'pval':np.NaN})
                                    temp_sim=temp_sim.append(temp_sim_s)
                            else: # between or within, not skip
                                # delete coeff if get_reg_coeff is False
                                temp_sim=pd.DataFrame({'ind': range(len(ret_val[0])),\
                                                   'corr_w_score': correlation_with_score,\
                                                   'score_type': 'placement',\
                                                   'sim_type': similarity_type,\
                                                   'roi_type': 'anatomical',\
                                                   'vs_mean_of': vs_mean_of,\
                                                   'roi_name': this_roi_name,\
                                                   'roi_hemi': this_roi_hemi,\
                                                   'n_voxels':data_in_sl['students']['test_data'][0].shape[0],\
                                                   'coeff_a':ret_val[2],\
                                                   'coeff_b':ret_val[3],\
                                                   'rval':ret_val[0],\
                                                   'pval':ret_val[1]})
                        else: # do_collapse, standard: get one rval,pval for each roi
                            temp_sim=pd.DataFrame({'corr_w_score': correlation_with_score,\
                                               'score_type': 'placement',\
                                               'sim_type': similarity_type,\
                                               'roi_type': 'anatomical',\
                                               'vs_mean_of': vs_mean_of,\
                                               'roi_name': this_roi_name,\
                                               'roi_hemi': this_roi_hemi,\
                                               'n_voxels':data_in_sl['students']['test_data'][0].shape[0],\
                                               'rval':ret_val[0],\
                                               'pval':ret_val[1]},index=[0])
                        sim_df=sim_df.append(temp_sim)
                t2=time.time()
                print('All ROIs in {}, {:02f} s'.format(vs_mean_of,t2-t1))   
        print('Corr {} complete'.format(similarity_type))
    print('{}-subjects complete'.format(correlation_with_score))
print('Done in {:02f} s'.format(t2-t0))

#SAVE
if not do_collapse:
    if not 'skip' in correlation_with_score:
        df_fname=join(pickles_path,'df_rois_similarity_per_item.csv')
    else:
        df_fname=join(pickles_path,'df_rois_similarity_per_question_and_student.csv') # for example scatterplot
else: 
    df_fname=join(pickles_path,'df_rois_similarity.csv') #

    
sim_df.to_csv(df_fname,index=False)
print ('{} saved'.format(df_fname))
                     

number of perms is set to 1
Corr: skip
Type: isfc
thr: all
vs mean of: student-vs-experts
ROI Cingulate-ant-bilateral: 1092 voxels
students-test_data
out shape: (1092, 446)
experts-test_data
out shape: (1092, 642)
ROI STG-post-bilateral: 856 voxels
students-test_data
out shape: (856, 446)
experts-test_data
out shape: (856, 642)
ROI Angular-bilateral: 1783 voxels
students-test_data
out shape: (1783, 446)
experts-test_data
out shape: (1783, 642)
ROI Heschls-bilateral: 454 voxels
students-test_data
out shape: (454, 446)
experts-test_data
out shape: (454, 642)
ROI Hippocampus-bilateral: 668 voxels
students-test_data
out shape: (668, 446)
experts-test_data
out shape: (668, 642)
ROI Amygdala-bilateral: 318 voxels
students-test_data
out shape: (318, 446)
experts-test_data
out shape: (318, 642)
ROI Precuneous-bilateral: 2845 voxels
students-test_data
out shape: (2845, 446)
experts-test_data
out shape: (2845, 642)
ROI Intracalcarine-bilateral: 925 voxels
students-test_data
out shape: (925, 446)

ROI Intracalcarine-bilateral: 925 voxels
students-test_data
out shape: (925, 446)
experts-test_data
out shape: (925, 642)
All ROIs in student-vs-students, 37.117988 s
Corr diagonal complete
within-subjects complete
number of perms is set to 1000
Corr: direct
Type: isfc
Type: diagonal
thr: all
vs mean of: student-vs-experts
ROI Cingulate-ant-bilateral: 1092 voxels
students-test_data
out shape: (1092, 446)
experts-test_data
out shape: (1092, 642)
ROI STG-post-bilateral: 856 voxels
students-test_data
out shape: (856, 446)
experts-test_data
out shape: (856, 642)
ROI Angular-bilateral: 1783 voxels
students-test_data
out shape: (1783, 446)
experts-test_data
out shape: (1783, 642)
ROI Heschls-bilateral: 454 voxels
students-test_data
out shape: (454, 446)
experts-test_data
out shape: (454, 642)
ROI Hippocampus-bilateral: 668 voxels
students-test_data
out shape: (668, 446)
experts-test_data
out shape: (668, 642)
ROI Amygdala-bilateral: 318 voxels
students-test_data
out shape: (318, 446)
experts