# Main Results 2b

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from pandas import read_csv
from tqdm import tqdm
sns.set_style('white')
sns.set_context('notebook', font_scale=1.33)
%matplotlib inline

## Section 1: Load and prepare data

In [2]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Define parameters.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Define subscales.
subscales = {
    '7u':    ['7u7d-q01','7u7d-q03','7u7d-q04','7u7d-q06','7u7d-q07','7u7d-q08','7u7d-q13'],
    '7d':    ['7u7d-q02','7u7d-q05','7u7d-q09','7u7d-q10','7u7d-q11','7u7d-q12','7u7d-q14'],
    'gad7':  ['gad7-q01','gad7-q02','gad7-q03','gad7-q04','gad7-q05','gad7-q06','gad7-q07'],
    'pswq':  ['pswq-q01','pswq-q02','pswq-q03'],
    'bis':   ['bisbas-q01','bisbas-q02','bisbas-q03','bisbas-q04'],
    'bas-r': ['bisbas-q05','bisbas-q06','bisbas-q07','bisbas-q08'],
    'bas-d': ['bisbas-q09','bisbas-q10','bisbas-q11','bisbas-q12'],
    'shaps': ['shaps-q01','shaps-q02','shaps-q03','shaps-q04','shaps-q05',
              'shaps-q06','shaps-q07','shaps-q08','shaps-q09','shaps-q10',
              'shaps-q11','shaps-q12','shaps-q13','shaps-q14'],
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Load and prepare data.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Load reject data.
reject = read_csv(os.path.join('data','reject.csv'))

## Load survey data.
surveys = read_csv(os.path.join('data','surveys.csv'))

## Restrict participants.
surveys = surveys.loc[surveys.subject.isin(reject.subject)]
    
## Merge infrequency items.
surveys = surveys.merge(reject[['platform','subject','infreq']])
surveys['infreq'] = np.where(surveys['infreq'], 1, 0)

## Restrict survey to valid items.
items = surveys[['platform','subject','infreq']+np.concatenate([*subscales.values()]).tolist()]

## Section 2: Cronbach's Alpha


In [3]:
from scipy.stats import ttest_rel
from numba import njit

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Define parameters.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Define number of permutations.
n_iter = 5000

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Define useful functions.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

@njit
def cronbach_alpha(X):
    "Estimate Cronbach's alpha for X, [n_obs, n_features]"
    n, k = X.shape
    C = np.cov(X.T)
    return (k / (k - 1)) * (1 - np.trace(C) / np.sum(C))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Compute observed statistics.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Preallocate space.
alpha_0 = np.zeros(len(subscales))
alpha_1 = np.zeros(len(subscales))

## Iteratively compute Cronbach's alpha.
for i, cols in enumerate(subscales.values()):
    alpha_0[i] = cronbach_alpha(items.loc[items.infreq==0,cols].values)
    alpha_1[i] = cronbach_alpha(items.loc[items.infreq==1,cols].values)
    
## Compute test statistic.
tval, _ = ttest_rel(alpha_0, alpha_1)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Permutation testing.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
np.random.seed(47404)

## Preallocate space
null_tvals = np.zeros(n_iter)

## Main loop.
permuted = items.copy()
for n in tqdm(range(n_iter)):
    
    ## Preallocate space.
    alpha_null = np.zeros((2, len(subscales))) 
    
    ## Permute rows of infrequncy score.
    permuted.infreq = np.random.permutation(permuted.infreq)
    
    ## Iteratively compute Cronbach's alpha.
    for i, cols in enumerate(subscales.values()):
        alpha_null[0,i] = cronbach_alpha(permuted.loc[permuted.infreq==0,cols].values)
        alpha_null[1,i] = cronbach_alpha(permuted.loc[permuted.infreq==1,cols].values)
        
    ## Compute test statistic.
    null_tvals[n], _ = ttest_rel(alpha_null[0], alpha_null[1])
    
## Compute p-value.
pval = (np.abs(tval) <= np.abs(null_tvals)).sum() / (n_iter + 1)

100%|██████████| 5000/5000 [02:17<00:00, 36.43it/s]


In [4]:
## Report outcomes.
print('no flag', alpha_0.round(3))
print('flagged', alpha_1.round(3))
print('tval = %0.3f, pval = %0.3f' %(tval,pval))

no flag [0.836 0.937 0.924 0.934 0.83  0.708 0.862 0.904]
flagged [0.838 0.882 0.872 0.807 0.624 0.599 0.691 0.812]
tval = 4.247, pval = 0.006


## Section 3: Rates of Clinical Endorsement 

In [5]:
from pandas import DataFrame

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Define parameters.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Define cutoff scores.
cutoffs = {'7u':12, '7d':12, 'gad7':10, 'pswq':11, 'bis':0, 'bas-r':0, 'bas-d':0, 'shaps':3}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Compute total scores.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Iteratively compute sum scores.
for i, (k, v) in enumerate(subscales.items()):
    surveys[k] = surveys[v].sum(axis=1)
    
## Restrict survey to valid items.
items = surveys[['platform','subject','infreq']+np.concatenate([*subscales.values()]).tolist()]

## Melt DataFrame.
scores = surveys[['platform','subject','infreq']+list(subscales.keys())].copy()
scores['shaps95'] = (items[subscales['shaps']] >= 2).sum(axis=1)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Compute summary statistics.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Preallocate space.
table = np.zeros((len(subscales), 6)) * np.nan

## Iteratively compute summary statistics.
for i, (k, v) in enumerate(cutoffs.items()):
    
    ## Compute total scores.
    table[i,:2] = scores.groupby('infreq')[k].mean()
    
    ## Compute proportion at threshold.
    if k == 'shaps': 
        table[i,2:4] = scores.groupby('infreq')['shaps95'].apply(lambda x: np.mean(x >= v))
    elif v: 
        table[i,2:4] = scores.groupby('infreq')[k].apply(lambda x: np.mean(x >= v))
        
## Store Cronbach alpha.
table[:,4] = alpha_0
table[:,5] = alpha_1

## Convert to DataFrame.
table = DataFrame(table, index=cutoffs.keys())
table

Unnamed: 0,0,1,2,3,4,5
7u,3.870432,10.152941,0.039867,0.447059,0.83611,0.837602
7d,4.750831,10.682353,0.093023,0.458824,0.937331,0.882235
gad7,4.863787,9.729412,0.172757,0.564706,0.923544,0.87202
pswq,4.784053,6.741176,0.069767,0.082353,0.934393,0.806849
bis,7.737542,7.917647,,,0.830147,0.62359
bas-r,6.38206,7.376471,,,0.708234,0.598797
bas-d,9.352159,8.788235,,,0.861721,0.691143
shaps,8.016611,10.847059,0.146179,0.294118,0.904094,0.812325
