In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import DataFrame, concat, read_csv
sns.set_theme(style='white', context='notebook', font_scale=1.33)

## Section 2: Factor Loadings

#### Model 2a [Bifactor w/ 10 groups, joint data]

In [None]:
from pandas import Categorical

## Load design data.
design = read_csv(os.path.join('data', 'design.csv'), index_col=0)
design = design[design.columns[:11]]
    
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Load and prepare data.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Load Stan summary.
summary = read_csv(os.path.join('stan_results', 'joint', '2plq_m2_summary.tsv'), sep='\t', index_col=0)

## Extract factor loadings.
loadings = np.zeros((len(design), 2)).astype(float)
for i, j in np.column_stack([np.where(design)]).T:
    loadings[i,int(j > 0)] = summary.loc[f'lambda[{i+1},{j+1}]','Mean']
     
## Convert to DataFrame.
design = design.drop(columns='general')
loadings = DataFrame(loadings, columns=['general', 'group'])
loadings.insert(0, 'item', np.arange(len(loadings))+1)
loadings.insert(0, 'subscale', design.columns[np.where(design.values)[-1]])

## Sort DataFrame.
cols = ['PVA','PPhysA','NVEA','SexA','EN','PN','WSV','WIPV','PeerVA','PeerPhysA']
loadings['subscale'] = Categorical(loadings.subscale, categories=cols, ordered=True)
loadings = loadings.sort_values(['subscale','item'])

## Format columns.
loadings['item'] = loadings.item.apply(lambda x: '%0.0f' %x)
loadings['general'] = loadings.general.apply(lambda x: '%0.3f' %x)
loadings['group'] = loadings.group.apply(lambda x: '%0.3f' %x)

## Convert to pivot table.
aggfunc = lambda x: ''.join(x)
loadings.insert(0, 'col', np.arange(len(loadings)) // 26)
loadings.insert(0, 'row', np.arange(len(loadings)) % 26)
loadings = loadings.pivot_table(['subscale','item','general','group'], 'row', 'col', aggfunc=aggfunc).fillna('')
loadings['subscale'] = loadings['subscale'].apply(lambda x: np.where(x == np.roll(x,1), '', x))

# ## Sort columns.
loadings = loadings[['subscale', 'item','general','group']]
loadings = loadings.swaplevel(0, 1, 1)
loadings = loadings[[0,1]]

print(loadings.to_latex(index=False))

In [10]:
## Load design data.
design = read_csv(os.path.join('data', 'design.csv'), index_col=0)
design = design[design.columns[:11]]

## Define locally dependent items.
ld = [[7,8], [9,10,11], [13,14], [15,16], [19,20], [21,22,23], [24,25], [33,34,35], [36,37]]
for ix in ld: design = design.drop(index=ix[1:])

## Load Stan summary.
summary = read_csv(os.path.join('stan_results', 'joint', 'grmq_m2_summary.tsv'), sep='\t', index_col=0)

## Extract factor loadings.
loadings = np.zeros((len(design), 2)).astype(float)
for i, j in np.column_stack([np.where(design)]).T:
    loadings[i,int(j > 0)] = summary.loc[f'lambda[{i+1},{j+1}]','Mean']
    
    ## Convert to DataFrame.
design = design.drop(columns='general')
loadings = DataFrame(loadings, columns=['general', 'group'])
loadings.insert(0, 'item', design.index)
loadings.insert(0, 'subscale', design.columns[np.where(design.values)[-1]])

In [11]:
loadings.sort_values(['subscale','item'])

Unnamed: 0,subscale,item,general,group
25,EN,38,0.73,0.17
26,EN,39,0.63,0.064
29,EN,42,0.42,0.56
30,EN,43,0.45,0.56
39,EN,52,0.67,0.41
4,NVEA,5,0.61,0.035
27,NVEA,40,0.69,0.02
28,NVEA,41,0.75,0.041
35,NVEA,48,0.58,0.54
36,NVEA,49,0.56,0.57


## Section 3: Variance Decomposition

In [5]:
studies = ['teicher2015', 'tuominen2022']

## Load design data.
design = read_csv(os.path.join('data', 'design.csv'), index_col=0)
design = design[design.columns[:11]]

## Define locally dependent items.
ld = [[7,8], [9,10,11], [13,14], [15,16], [19,20], [21,22,23], [24,25], [33,34,35], [36,37]]
for ix in ld: design = design.drop(index=ix[1:])

stats = []
for study in studies:
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Load and prepare data.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

    ## Load Stan summary.
    summary = read_csv(os.path.join('stan_results', study, 'grmq_m2_summary.tsv'), sep='\t', index_col=0)
    
    ## Extract factor loadings.
    loadings = np.zeros_like(design).astype(float)
    for i, j in np.column_stack([np.where(design)]).T:
        loadings[i,j] = summary.loc[f'lambda[{i+1},{j+1}]','Mean']
        
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Coefficient omega hierachical.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
        
    ## Preallocate space.
    omega   = np.zeros(len(design.columns))
    omega_s = np.zeros(len(design.columns))
        
    ## Iterate over factors.
    for i, col in enumerate(design.columns):
        
        ## Restrict to items in group.
        L = loadings[design[col]==1]
        
        ## Compute squared sum of factor loadings.
        A = np.square(np.sum(L, axis=0))
        
        ## Compute sum of error variances.
        B = np.sum(1 - np.square(L).sum(axis=1))
        
        ## Compute total variance.
        C = np.sum(A) + B
        
        ## Compute coefficient omega.
        omega[i] = A.sum() / C
        
        ## Compute coefficient omega subscale.
        omega_s[i] = A[i] / C
        
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Explained common variance.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
        
    ## Compute sum of squares.
    ss = np.square(loadings).sum(axis=0)
    
    ## Compute explained common variance.
    ecv = ss / ss.sum()
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### H-index
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    
    ## Preallocate space.
    H = np.zeros(len(design.columns))
    
    ## Iterate over factors.
    for i, col in enumerate(design.columns):
        
        ## Compute squared loadings.
        s = np.square(loadings[:,i])
        
        ## Compute H-index.
        H[i] = 1. / (1 + 1 / np.sum(s / (1-s)))
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Convert to DataFrame.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    
    stats.append(DataFrame(dict(
        subscale = design.columns,
        study = np.repeat(study, design.columns.size),
        ecv = ecv,
        omega = omega,
        omega_s = omega_s,
        H = H
    )))
    
## Concatenate DataFrames.
stats = concat(stats).replace({'teicher2015':1, 'tuominen2022': 2, 'joint': 3})

## Convert to pivot table.
stats = stats.pivot_table(['omega','omega_s','ecv','H'], 'subscale', 'study')

## Re-organize table.
index = ['general', 'PVA', 'PPhysA', 'NVEA', 'SexA', 'EN', 'PN', 'WSV', 'WIPV', 'PeerVA', 'PeerPhysA']
stats = stats.loc[index, ['omega','omega_s','H']]

In [6]:
stats.round(3)

Unnamed: 0_level_0,omega,omega,omega_s,omega_s,H,H
study,1,2,1,2,1,2
subscale,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
general,0.966,0.967,0.928,0.927,0.966,0.963
PVA,0.912,0.892,0.068,0.158,0.272,0.472
PPhysA,0.788,0.787,0.144,0.228,0.272,0.373
NVEA,0.848,0.828,0.138,0.119,0.531,0.425
SexA,0.717,0.753,0.061,0.292,0.142,0.454
EN,0.712,0.876,0.141,0.203,0.259,0.542
PN,0.803,0.817,0.215,0.117,0.483,0.289
WSV,0.7,0.663,0.028,0.037,0.056,0.068
WIPV,0.666,0.623,0.145,0.083,0.197,0.116
PeerVA,0.878,0.851,0.624,0.569,0.82,0.766


In [None]:
print(stats.round(3).to_latex())