In [2]:
import csv
import importlib
import itertools
import numpy as np
import pandas as pd
import pickle
import os, shutil, time, h5py
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf

from bct import *
importlib.reload(nbs)
from IPython.display import clear_output
from pathlib import Path
from sklearn import preprocessing
from scipy import stats

In [3]:
%%capture
#from tqdm import tqdm_notebook as tqdm
from tqdm.notebook import tqdm
tqdm().pandas()

In [4]:
# Setting Variables

## CSV Matrices of Connectivitiy Values as Z-scores (via cifti-connectivity)
pconncsv_dir = Path('/fast_scratch/jdr/ABCD/ABCD_3165_Pipe/Schaefer2018400p_TianS4/pconns_z_csv')
datadir = Path('/fast_scratch/jdr/ABCD/ABCD_3165_Pipe/Data/IPV_Networks')
ROI_labels_path = Path('/fast_scratch/jdr/atlases/Schaefer400Tian4/Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S4_label_reformatted.txt')
allArraysPath = Path('/fast_scratch/jdr/ABCD/ABCD_3165_Pipe/Data/IPV_Networks/allArrays.pkl')
crpbiPath = Path('/fast_scratch/jdr/ABCD/ABCD_3165_Pipe/Data/abcd_crpbi_meansonly_baseline.txt')

In [17]:
## Pull in the ROI labels from the atlas directory and place them in a list

with open(ROI_labels_path, newline='') as f:
    reader = csv.reader(f, delimiter='\t')
    ROI_labels = list(list(zip(*reader))[1])

In [36]:
## Loop over CSV files in the pconncsv_dir - for each grab the subject ID (key) and the matrix and add each as a new 
## item in an expanding list - allArrays

subjIDs = []
allArrays = []

for pconncsv in pconncsv_dir.glob('sub-NDARINV*.csv'):
    clear_output(wait=True)
    subjID = pconncsv.name.split('_')[0]
    #subjIDs.append(subjID)

    csv_in_sq = pd.read_csv(pconncsv, sep="\t", header=None, names=ROI_labels)
    csv_in_sq.index = ROI_labels
    allArrays.append([subjID, csv_in_sq])
    print(pconncsv)

with open(allArraysPath, 'wb') as f:
    pickle.dump(allArrays, f)

/fast_scratch/jdr/ABCD/ABCD_3165_Pipe/Schaefer2018400p_TianS4/pconns_z_csv/sub-NDARINVB7A2MURD_ses-baselineYear1Arm1_task-rest_bold_desc-filtered_timeseries.ptseries.nii_8_minutes_of_data_at_FD_0.2.Z.pconn.nii.csv


In [37]:
allArrays = None
with open(allArraysPath, 'rb') as f:
    allArrays = pickle.load(f)

In [38]:
## Grab the ABCD file with the IPV exposure data and import to a table (skipping the header)
c_data = pd.read_table('/fast_scratch/jdr/ABCD/ABCD_3165_Pipe/Data/abcd_ptsd01.txt', header=0, skiprows=[1])

## Subset to only include the baseline data
c_data = c_data[c_data["eventname"] == "baseline_year_1_arm_1"]

## Drop unnecessary variables
c_data = c_data.drop(['collection_id', 'abcd_ptsd01_id', 'dataset_id', 
                      'src_subject_id', 'interview_date', 'eventname', 'collection_title'], axis=1)

## Reformat the 'subjectkey' variable to match the format in the CSV files
c_data['subjectkey'] = 'sub-' + c_data['subjectkey'].str.replace('_', '')

## In case they exist, drop the variables we're about to calculate
c_data = c_data.drop(['ExposureTotal', 'IPVTotal', 'ExposureAny', 'IPVAny'], axis=1, errors='ignore')

## Compute total overall exposure (all items) and total IPV exposure (IPV items only)
c_data['ExposureTotal'] = c_data.iloc[:, 3:19].sum(axis=1)
c_data['IPVTotal'] = c_data.iloc[:, 7:18].sum(axis=1)

## Binarize total and IPV exposure items 
c_data['ExposureAny'] = np.where(c_data['ExposureTotal'] == 0, 0, 1)
c_data['IPVAny'] = np.where(c_data['IPVTotal'] == 0, 0, 1)

## Dummy code sex variable to Sex01
c_data['Sex01'] = np.where(c_data['sex'] == 'M', 1, 0) # Must use numeric data as input (not M, F)


In [39]:
crpbi_data = pd.read_table(crpbiPath)
crpbi_data_nm = crpbi_data
crpbi_data_nm['crpbi_par1_mn'] = pd.to_numeric(crpbi_data_nm['crpbi_par1_mn'], errors='coerce')
crpbi_data_nm['crpbi_par2_mn'] = pd.to_numeric(crpbi_data_nm['crpbi_par2_mn'], errors='coerce')
crpbi_data_nm['subjectkey'] = 'sub-' + crpbi_data_nm['subjectkey'].str.replace('_','')
crpbi_data_nm = crpbi_data_nm.dropna()

In [43]:
def getNetworkCorr(arrayslist, searchstr1, searchstr2):
    networkCorr_mn = []
    for i in tqdm(list(range(len(arrayslist)))):
        networkCorr_subj = arrayslist[i][1].loc[[x for x in arrayslist[i][1].columns if searchstr1 in x],
                                              [x for x in arrayslist[i][1].columns if searchstr2 in x]]
        networkCorr_mn.append([arrayslist[i][0], np.nanmean(networkCorr_subj.values.flatten())])
    
    networkCorr_df = pd.DataFrame(networkCorr_mn, columns=['subjectkey', str(searchstr1 + '_' + searchstr2)])
        
    return networkCorr_df

In [44]:
networks = ['Default', 'SalVentAttn', 'Cont']

networks_df = pd.DataFrame(columns=['subjectkey'])
for networkName in itertools.combinations_with_replacement(networks, 2):
    network_df = getNetworkCorr(allArrays, networkName[0], networkName[1])
    networks_df = networks_df.merge(network_df, how='right', on='subjectkey')
    ## Merge the within default mode data with the clinical data


  0%|          | 0/7219 [00:00<?, ?it/s]

  0%|          | 0/7219 [00:00<?, ?it/s]

  0%|          | 0/7219 [00:00<?, ?it/s]

  0%|          | 0/7219 [00:00<?, ?it/s]

  0%|          | 0/7219 [00:00<?, ?it/s]

  0%|          | 0/7219 [00:00<?, ?it/s]

In [45]:
## Merge the within default mode data with the clinical data
df_comb = c_data.merge(networks_df, how='right', on='subjectkey')
df_comb = df_comb.merge(crpbi_data_nm, how='left', on='subjectkey')
df_comb.shape
df_comb


Unnamed: 0,subjectkey,interview_age,sex,ksads_ptsd_raw_754_p,ksads_ptsd_raw_755_p,ksads_ptsd_raw_756_p,ksads_ptsd_raw_757_p,ksads_ptsd_raw_758_p,ksads_ptsd_raw_759_p,ksads_ptsd_raw_760_p,...,IPVAny,Sex01,Default_Default,Default_SalVentAttn,Default_Cont,SalVentAttn_SalVentAttn,SalVentAttn_Cont,Cont_Cont,crpbi_par1_mn,crpbi_par2_mn
0,sub-NDARINV4BXZZV14,120.0,M,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.284014,-0.065891,0.025845,0.277060,0.033916,0.206947,2.6,2.8
1,sub-NDARINV4C12ZREL,110.0,F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.144296,-0.017724,0.012787,0.146344,-0.017479,0.122261,3.0,3.0
2,sub-NDARINV4C2YD3TH,131.0,M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.246957,-0.116573,0.038831,0.371918,0.016191,0.203970,2.8,3.0
3,sub-NDARINV4C616XF8,117.0,M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.262188,-0.092777,0.050592,0.416879,-0.018093,0.168920,3.0,3.0
4,sub-NDARINV4C8YJ9BG,131.0,M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.220370,0.009909,0.071169,0.168979,0.035588,0.301850,1.6,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7214,sub-NDARINVB6AN7YDK,120.0,F,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.258827,-0.188481,0.016785,0.333071,-0.042093,0.260756,2.8,2.0
7215,sub-NDARINVB6YGLTJK,130.0,M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.281285,-0.032760,0.038468,0.330781,-0.001622,0.222155,3.0,2.6
7216,sub-NDARINVB7440KXP,113.0,F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.373494,-0.228905,0.000385,0.471992,0.064618,0.194266,2.6,3.0
7217,sub-NDARINVB74PPWH0,119.0,F,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.285703,-0.131857,0.046128,0.317314,-0.048313,0.214178,2.6,2.4


In [72]:
df_comb.to_csv('/fast_scratch/jdr/df_combgood.csv')

In [46]:
IPVplus = df_comb.query('IPVAny == 1')
IPVminus = df_comb.query('IPVAny == 0')

In [75]:
def_def = (smf.ols('Default_Default ~ Sex01 + interview_age + IPVAny', data=df_comb).fit()).summary2()
print(def_def)

def_sal = (smf.ols('Default_SalVentAttn ~ Sex01 + interview_age + IPVAny', data=df_comb).fit()).summary()
print(def_sal)

def_cont = (smf.ols('Default_Cont ~ Sex01 + interview_age + IPVAny', data=df_comb).fit()).summary()
print(def_cont)

sal_sal = (smf.ols('SalVentAttn_SalVentAttn ~ Sex01 + interview_age + IPVAny', data=df_comb).fit()).summary()
print(sal_sal)

sal_cont = (smf.ols('SalVentAttn_Cont ~  Sex01 + interview_age + IPVAny', data=df_comb).fit()).summary()
print(sal_cont)

cont_cont = (smf.ols('Cont_Cont ~  Sex01 + interview_age + IPVAny', data=df_comb).fit()).summary()
print(cont_cont)


                  Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     0.027      
Dependent Variable: Default_Default  AIC:                -20054.8845
Date:               2021-12-03 20:49 BIC:                -20027.3488
No. Observations:   7215             Log-Likelihood:     10031.     
Df Model:           3                F-statistic:        68.51      
Df Residuals:       7211             Prob (F-statistic): 1.12e-43   
R-squared:          0.028            Scale:              0.0036317  
---------------------------------------------------------------------
                 Coef.   Std.Err.     t      P>|t|    [0.025   0.975]
---------------------------------------------------------------------
Intercept        0.2352    0.0113   20.8186  0.0000   0.2130   0.2573
Sex01           -0.0189    0.0014  -13.3253  0.0000  -0.0217  -0.0161
interview_age    0.0004    0.0001    4.4867  0.0000   0.0002   0.0006
IPVAny          -0.0094    0.0025   -3.7591  0.

In [73]:
def_def = (smf.ols('Default_Default ~ Sex01 + interview_age + crpbi_par1_mn*IPVTotal', data=df_comb).fit()).summary2()
print(def_def)

def_sal = (smf.ols('Default_SalVentAttn ~ Sex01 + interview_age + crpbi_par1_mn*IPVTotal', data=df_comb).fit()).summary2()
print(def_sal)

def_cont = (smf.ols('Default_Cont ~ Sex01 + interview_age + crpbi_par1_mn*IPVTotal', data=df_comb).fit()).summary2()
print(def_cont)

sal_sal = (smf.ols('SalVentAttn_SalVentAttn ~ Sex01 + interview_age + crpbi_par1_mn*IPVTotal', data=df_comb).fit()).summary2()
print(sal_sal)

sal_cont = (smf.ols('SalVentAttn_Cont ~  Sex01 + interview_age + crpbi_par1_mn*IPVTotal', data=df_comb).fit()).summary2()
print(sal_cont)

cont_cont = (smf.ols('Cont_Cont ~  Sex01 + interview_age + crpbi_par1_mn*IPVTotal', data=df_comb).fit()).summary2()
print(cont_cont)


                    Results: Ordinary least squares
Model:               OLS               Adj. R-squared:      0.026      
Dependent Variable:  Default_Default   AIC:                 -18613.6750
Date:                2021-12-03 17:00  BIC:                 -18572.8123
No. Observations:    6704              Log-Likelihood:      9312.8     
Df Model:            5                 F-statistic:         37.09      
Df Residuals:        6698              Prob (F-statistic):  1.25e-37   
R-squared:           0.027             Scale:               0.0036418  
-----------------------------------------------------------------------
                        Coef.  Std.Err.    t     P>|t|   [0.025  0.975]
-----------------------------------------------------------------------
Intercept               0.2250   0.0137  16.4703 0.0000  0.1982  0.2518
Sex01                  -0.0188   0.0015 -12.7019 0.0000 -0.0217 -0.0159
interview_age           0.0004   0.0001   4.5359 0.0000  0.0003  0.0006
crpbi_par1_m

                   Results: Ordinary least squares
Model:                OLS              Adj. R-squared:     0.003      
Dependent Variable:   Cont_Cont        AIC:                -22459.6653
Date:                 2021-12-03 17:00 BIC:                -22418.8026
No. Observations:     6704             Log-Likelihood:     11236.     
Df Model:             5                F-statistic:        5.386      
Df Residuals:         6698             Prob (F-statistic): 6.01e-05   
R-squared:            0.004            Scale:              0.0020520  
----------------------------------------------------------------------
                        Coef.  Std.Err.    t    P>|t|   [0.025  0.975]
----------------------------------------------------------------------
Intercept               0.1798   0.0103 17.5375 0.0000  0.1597  0.1999
Sex01                  -0.0034   0.0011 -3.0490 0.0023 -0.0056 -0.0012
interview_age           0.0003   0.0001  3.9178 0.0001  0.0001  0.0004
crpbi_par1_mn           0.

In [102]:
def getFreqPct(colName):
    pct = (df_comb[colName].value_counts()[1] / df_comb.shape[0])*100
    print(df_comb[colName].value_counts())
    print(pct)
    return(pct)

terrorPct = getFreqPct('ksads_ptsd_raw_758_p')
warPct = getFreqPct('ksads_ptsd_raw_759_p')
witShotStab = getFreqPct('ksads_ptsd_raw_760_p')
violNonFam = getFreqPct('ksads_ptsd_raw_761_p')
violFam = getFreqPct('ksads_ptsd_raw_762_p')
beatenFam = getFreqPct('ksads_ptsd_raw_763_p')
nonFamThreat = getFreqPct('ksads_ptsd_raw_764_p')
famThreat = getFreqPct('ksads_ptsd_raw_765_p')
witDomViol = getFreqPct('ksads_ptsd_raw_766_p')
famSexAb = getFreqPct('ksads_ptsd_raw_767_p')
nonFamSexAb = getFreqPct('ksads_ptsd_raw_768_p')
peerSexAb = getFreqPct('ksads_ptsd_raw_769_p')

0.0    6977
1.0      27
Name: ksads_ptsd_raw_758_p, dtype: int64
0.3739612188365651
0.0    6974
1.0      30
Name: ksads_ptsd_raw_759_p, dtype: int64
0.41551246537396125
0.0    6947
1.0      57
Name: ksads_ptsd_raw_760_p, dtype: int64
0.7894736842105263
0.0    6983
1.0      21
Name: ksads_ptsd_raw_761_p, dtype: int64
0.29085872576177285
0.0    6978
1.0      26
Name: ksads_ptsd_raw_762_p, dtype: int64
0.3601108033240997
0.0    6944
1.0      60
Name: ksads_ptsd_raw_763_p, dtype: int64
0.8310249307479225
0.0    6962
1.0      42
Name: ksads_ptsd_raw_764_p, dtype: int64
0.5817174515235457
0.0    6963
1.0      41
Name: ksads_ptsd_raw_765_p, dtype: int64
0.5678670360110804
0.0    6504
1.0     500
Name: ksads_ptsd_raw_766_p, dtype: int64
6.9252077562326875
0.0    6975
1.0      29
Name: ksads_ptsd_raw_767_p, dtype: int64
0.40166204986149584
0.0    6953
1.0      51
Name: ksads_ptsd_raw_768_p, dtype: int64
0.7063711911357341
0.0    6913
1.0      91
Name: ksads_ptsd_raw_769_p, dtype: int64
1.260387

In [65]:
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot
from patsy.contrasts import Sum

def eta_squared(aov):
    aov['eta_sq'] = 'NaN'
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    return aov

def omega_squared(aov):
    mse = aov['sum_sq'][-1]/aov['df'][-1]
    aov['omega_sq'] = 'NaN'
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*mse))/(sum(aov['sum_sq'])+mse)
    return aov

def aovLM_mainwCovs(vector):
    formula = vector + ' ~C(IPVAny) + C(Sex01) + interview_age'
    model = ols(formula, df_comb).fit()
    aov_table = anova_lm(model, typ=2)
    eta_squared(aov_table)
    omega_squared(aov_table)
    aov_table['PR(>F)'] = round(aov_table['PR(>F)'], 4)
    return aov_table

def aovLM_SexByNet(vector):
    formula = vector + ' ~ C(Sex01)*C(IPVAny) + interview_age'
    model = ols(formula, df_comb).fit()
    aov_table = anova_lm(model, typ=2)
    eta_squared(aov_table)
    omega_squared(aov_table)
    aov_table['PR(>F)'] = round(aov_table['PR(>F)'], 4)
    return aov_table

def aovLM_AgeByNet(vector):
    formula = vector + ' ~ C(interview_age)*C(IPVAny) + C(Sex01)'
    model = ols(formula, df_comb).fit()
    aov_table = anova_lm(model, typ=2)
    eta_squared(aov_table)
    omega_squared(aov_table)
    aov_table['PR(>F)'] = round(aov_table['PR(>F)'], 4)
    return aov_table

print('IPV + AGE + SEX: DEFAULT BY DEFAULT')
print(aovLM_mainwCovs('Default_Default'))

print('IPV + AGE + SEX: DEFAULT BY SALIENCE')
print(aovLM_mainwCovs('Default_SalVentAttn'))

print('IPV + AGE + SEX: DEFAULT BY FPN')
print(aovLM_mainwCovs('Default_Cont'))

print('IPV + AGE + SEX: SALIENCE BY SALIENCE')
print(aovLM_mainwCovs('SalVentAttn_SalVentAttn'))

print('IPV + AGE + SEX: SALIENCE BY FPN')
print(aovLM_mainwCovs('SalVentAttn_Cont'))

print('IPV + AGE + SEX: FPN BY FPN')
print(aovLM_mainwCovs('Cont_Cont'))

print('SEX X IPV + AGE: DEFAULT BY DEFAULT')
print(aovLM_SexByNet('Default_Default'))

print('SEX X IPV + AGE: DEFAULT BY SALIENCE')
print(aovLM_SexByNet('Default_SalVentAttn'))

print('SEX X IPV + AGE: DEFAULT BY FPN')
print(aovLM_SexByNet('Default_Cont'))

print('SEX X IPV + AGE: SALIENCE BY SALIENCE')
print(aovLM_SexByNet('SalVentAttn_SalVentAttn'))

print('SEX X IPV + AGE: SALIENCE BY FPN')
print(aovLM_SexByNet('SalVentAttn_Cont'))

print('SEX X IPV + AGE: FPN BY FPN')
print(aovLM_SexByNet('Cont_Cont'))




IPV + AGE + SEX: DEFAULT BY DEFAULT
                  sum_sq      df           F  PR(>F)    eta_sq  omega_sq
C(IPVAny)       0.051318     1.0   14.130521  0.0002  0.001904  0.001769
C(Sex01)        0.644870     1.0  177.564705  0.0000  0.023921  0.023784
interview_age   0.073108     1.0   20.130163  0.0000  0.002712  0.002577
Residual       26.188538  7211.0         NaN     NaN       NaN       NaN
IPV + AGE + SEX: DEFAULT BY SALIENCE
                  sum_sq      df           F  PR(>F)    eta_sq  omega_sq
C(IPVAny)       0.011411     1.0    4.513954  0.0337  0.000608  0.000473
C(Sex01)        0.422539     1.0  167.142827  0.0000  0.022516  0.022378
interview_age   0.102596     1.0   40.583595  0.0000  0.005467  0.005332
Residual       18.229488  7211.0         NaN     NaN       NaN       NaN
IPV + AGE + SEX: DEFAULT BY FPN
                  sum_sq      df         F  PR(>F)    eta_sq  omega_sq
C(IPVAny)       0.008792     1.0  5.816450  0.0159  0.000806  0.000667
C(Sex01)        0.00452

In [68]:
def runTTest_ind(vec1, vec2):
    vec1_noNaN = vec1[np.logical_not(np.isnan(vec1))]
    vec2_noNaN = vec2[np.logical_not(np.isnan(vec2))]
    levene_p = stats.levene(vec1_noNaN, vec2_noNaN)[1]
    if levene_p > 0.05:
        equalvar=True
        print("Assuming equal variance per non-significant Levene's Test.")
    else:
        print("Assuming UN-equal variance per significant Levene's Test.")
        equalvar=False  
    result = stats.ttest_ind(vec1, vec2, equal_var=equalvar, nan_policy='omit')
    display(result)

runTTest_ind(IPVplus['Default_Default'], IPVminus['Default_Default'])
runTTest_ind(IPVplus['Default_SalVentAttn'], IPVminus['Default_SalVentAttn'])
runTTest_ind(IPVplus['Default_Cont'], IPVminus['Default_Cont'])
runTTest_ind(IPVplus['SalVentAttn_SalVentAttn'], IPVminus['SalVentAttn_SalVentAttn'])
runTTest_ind(IPVplus['SalVentAttn_Cont'], IPVminus['SalVentAttn_Cont'])
runTTest_ind(IPVplus['Cont_Cont'], IPVminus['Cont_Cont'])

Assuming equal variance per non-significant Levene's Test.


Ttest_indResult(statistic=-3.4543583708709664, pvalue=0.0005547768588355575)

Assuming equal variance per non-significant Levene's Test.


Ttest_indResult(statistic=1.8275566315872613, pvalue=0.06765741151864141)

Assuming equal variance per non-significant Levene's Test.


Ttest_indResult(statistic=2.4294425646631352, pvalue=0.01514631208965521)

Assuming equal variance per non-significant Levene's Test.


Ttest_indResult(statistic=-4.118480858998394, pvalue=3.856379219366135e-05)

Assuming equal variance per non-significant Levene's Test.


Ttest_indResult(statistic=-0.2706291378879219, pvalue=0.7866840262921249)

Assuming equal variance per non-significant Levene's Test.


Ttest_indResult(statistic=-0.1308007643687972, pvalue=0.8959365660132629)

In [70]:
#https://dfrieds.com/math/effect-size.html
def two_sample_data_setup_for_cohens_d(vec1, vec2):
    mean_1 = np.mean(vec1)
    mean_2 = np.mean(vec2)
    sample_std_dev_1 = np.std(vec1)
    sample_std_dev_2 = np.std(vec2)
    values_1 = np.random.normal(loc=mean_1, scale=sample_std_dev_1, size=3000)
    values_2 = np.random.normal(loc=mean_2, scale=sample_std_dev_2, size=3000)
    differences_group_values = np.subtract(values_1, values_2)
    std_dev_differences_group_values = np.std(differences_group_values, ddof=1)
    cohens_d = round(abs((mean_1 - mean_2)/std_dev_differences_group_values), 3)
    print(cohens_d)
    return cohens_d

In [71]:
Default_Default_cohens_d = two_sample_data_setup_for_cohens_d(IPVplus['Default_Default'], 
                                                              IPVminus['Default_Default']) 

Default_SalVentAttn_cohens_d = two_sample_data_setup_for_cohens_d(IPVplus['Default_SalVentAttn'], 
                                                              IPVminus['Default_SalVentAttn'])

Default_Cont_cohens_d = two_sample_data_setup_for_cohens_d(IPVplus['Default_Cont'], 
                                                           IPVminus['Default_Cont'])

SalVentAttn_SalVentAttn_cohens_d = two_sample_data_setup_for_cohens_d(IPVplus['SalVentAttn_SalVentAttn'], 
                                                              IPVminus['SalVentAttn_SalVentAttn'])

SalVentAttn_Cont_cohens_d = two_sample_data_setup_for_cohens_d(IPVplus['SalVentAttn_Cont'], 
                                                              IPVminus['SalVentAttn_Cont'])

Cont_Cont_cohens_d = two_sample_data_setup_for_cohens_d(IPVplus['Cont_Cont'], 
                                                        IPVminus['Cont_Cont'])

0.102
0.054
0.072
0.121
0.008
0.004


In [82]:
c_data_IPVplus = c_data.query('IPVAny == 1').loc[:, ['subjectkey', 'IPVAny']]
c_data_IPVminus = c_data.query('IPVAny == 0').loc[:, ['subjectkey', 'IPVAny']]

In [83]:
IPVMinusArrays = []
IPVPlusArrays = []

for i in tqdm(list(range(len(allArrays)))):
    subjectkeyArray = allArrays[i][0]
    try:
        IPVvalue = c_data[c_data.subjectkey==subjectkeyArray].IPVAny.item()
        if IPVvalue == 0:
            IPVMinusArrays.append(allArrays[i][1].to_numpy())
        elif IPVvalue == 1:
            IPVPlusArrays.append(allArrays[i][1].to_numpy())
    except:
        next

  0%|          | 0/7220 [00:00<?, ?it/s]

In [84]:
IPVMinusMatrices = np.array(IPVMinusArrays).T
IPVPlusMatrices = np.array(IPVPlusArrays).T

In [85]:
pvals, adj, nulls = nbs.nbs_bct(IPVMinusMatrices, IPVPlusMatrices, thresh=3, verbose=True)

max component size is 6407
estimating null distribution with 1000 permutations
permutation 0 of 1000.  Permutation max is 212.0.  Observed max is 6407.0.  P-val estimate is 0.000 


KeyboardInterrupt: 

In [15]:
adj

NameError: name 'adj' is not defined