In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import DataFrame, concat, read_csv
sns.set_theme(style='white', context='notebook', font_scale=1.33)

## Section 2: Factor Loadings

#### Model 2a [Bifactor w/ 10 groups, joint data]

In [2]:
from pandas import Categorical

## Load design data.
design = read_csv(os.path.join('data', 'design.csv'), index_col=0)
design = design[design.columns[:11]]

## Define locally dependent items.
ld = [[7,8], [9,10,11], [13,14], [15,16], [19,20], [21,22,23], [24,25], [33,34,35], [36,37]]
for ix in ld: design = design.drop(index=ix[1:])
    
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Load and prepare data.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Load Stan summary.
m1 = read_csv(os.path.join('stan_results', 'teicher2015', 'grmq_m1_summary.tsv'), sep='\t', index_col=0)
m2 = read_csv(os.path.join('stan_results', 'teicher2015', 'grmq_m2_summary.tsv'), sep='\t', index_col=0)

## Extract factor loadings.
loadings = np.zeros((len(design), 2)).astype(float)
for i, j in np.column_stack([np.where(design)]).T:
    loadings[i,int(j > 0)] = m2.loc[f'lambda[{i+1},{j+1}]','Mean']
     
## Merge with unidimensional model.
loadings = np.column_stack([
    m1.T.filter(regex='lambda').T['Mean'].values,
    loadings
])
        
## Convert to DataFrame.
design = design.drop(columns='general')
loadings = DataFrame(loadings, columns=['uni','general', 'group'])
loadings.insert(0, 'item', design.index)
loadings.insert(0, 'subscale', design.columns[np.where(design.values)[-1]])

## Sort DataFrame.
cols = ['PVA','PPhysA','NVEA','SexA','EN','PN','WSV','WIPV','PeerVA','PeerPhysA']
loadings['subscale'] = Categorical(loadings.subscale, categories=cols, ordered=True)
loadings = loadings.sort_values(['subscale','item'])

## Format columns.
loadings['item'] = loadings.item.apply(lambda x: '%0.0f' %x)
loadings['uni'] = loadings.uni.apply(lambda x: '%0.3f' %x)
loadings['general'] = loadings.general.apply(lambda x: '%0.3f' %x)
loadings['group'] = loadings.group.apply(lambda x: '%0.3f' %x)

## Convert to pivot table.
aggfunc = lambda x: ''.join(x)
loadings.insert(0, 'col', np.arange(len(loadings)) // 20)
loadings.insert(0, 'row', np.arange(len(loadings)) % 20)
loadings = loadings.pivot_table(['subscale','item','uni','general','group'], 'row', 'col', aggfunc=aggfunc).fillna('')
loadings['subscale'] = loadings['subscale'].apply(lambda x: np.where(x == np.roll(x,1), '', x))

## Sort columns.
loadings = loadings[['subscale', 'item','uni','general','group']]
loadings = loadings.swaplevel(0, 1, 1)
loadings = loadings[[0,1]]

## Display table.
loadings

col,0,0,0,0,0,1,1,1,1,1
Unnamed: 0_level_1,subscale,item,uni,general,group,subscale,item,uni,general,group
row,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0,PVA,1,0.84,0.83,0.33,EN,43,0.39,0.39,0.26
1,,2,0.86,0.83,0.44,,52,0.59,0.61,0.18
2,,3,0.84,0.87,0.042,PN,44,0.58,0.5,0.48
3,,4,0.67,0.69,0.1,,45,0.54,0.44,0.53
4,PPhysA,6,0.76,0.78,0.44,,46,0.69,0.7,0.17
5,,7,0.74,0.74,0.25,,47,0.68,0.68,0.11
6,,9,0.48,0.47,0.25,,51,0.5,0.49,0.41
7,NVEA,5,0.61,0.62,0.075,WSV,15,0.68,0.71,0.078
8,,40,0.7,0.71,0.015,,17,0.63,0.63,0.15
9,,41,0.72,0.75,0.067,,18,0.6,0.6,0.17


In [3]:
print(loadings.to_latex(index=False))

\begin{tabular}{llllllllll}
\toprule
       0 & \multicolumn{5}{l}{1} \\
subscale & item &   uni & general & group &  subscale & item &   uni & general & group \\
     PVA &    1 & 0.840 &   0.830 & 0.330 &        EN &   43 & 0.390 &   0.390 & 0.260 \\
\midrule
         &    2 & 0.860 &   0.830 & 0.440 &           &   52 & 0.590 &   0.610 & 0.180 \\
         &    3 & 0.840 &   0.870 & 0.042 &        PN &   44 & 0.580 &   0.500 & 0.480 \\
         &    4 & 0.670 &   0.690 & 0.100 &           &   45 & 0.540 &   0.440 & 0.530 \\
  PPhysA &    6 & 0.760 &   0.780 & 0.440 &           &   46 & 0.690 &   0.700 & 0.170 \\
         &    7 & 0.740 &   0.740 & 0.250 &           &   47 & 0.680 &   0.680 & 0.110 \\
         &    9 & 0.480 &   0.470 & 0.250 &           &   51 & 0.500 &   0.490 & 0.410 \\
    NVEA &    5 & 0.610 &   0.620 & 0.075 &       WSV &   15 & 0.680 &   0.710 & 0.078 \\
         &   40 & 0.700 &   0.710 & 0.015 &           &   17 & 0.630 &   0.630 & 0.150 \\
         &   41 & 

## Section 3: Variance Decomposition

In [4]:
## Load design data.
design = read_csv(os.path.join('data', 'design.csv'), index_col=0)

## Define locally dependent items.
ld = [[7,8], [9,10,11], [13,14], [15,16], [19,20], [21,22,23], [24,25], [33,34,35], [36,37]]
for ix in ld: design = design.drop(index=ix[1:])

### 3.1 Bifactor model

In [5]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Define parameters.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Define I/O parameters.
studies = ['teicher2015', 'tuominen2022']

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Main loop.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Restrict to columns of interest.
D = design[design.columns[:11]].copy()

stats = []
for study in studies:
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Load and prepare data.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

    ## Load Stan summary.
    summary = read_csv(os.path.join('stan_results', study, 'grmq_m2_summary.tsv'), sep='\t', index_col=0)
    
    ## Extract factor loadings.
    loadings = np.zeros_like(D).astype(float)
    for i, j in np.column_stack([np.where(D)]).T:
        loadings[i,j] = summary.loc[f'lambda[{i+1},{j+1}]','Mean']
        
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Coefficient omega hierachical.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
        
    ## Preallocate space.
    omega   = np.zeros(len(D.columns))
    omega_s = np.zeros(len(D.columns))
        
    ## Iterate over factors.
    for i, col in enumerate(D.columns):
        
        ## Restrict to items in group.
        L = loadings[D[col]==1]
        
        ## Compute squared sum of factor loadings.
        A = np.square(np.sum(L, axis=0))
        
        ## Compute sum of error variances.
        B = np.sum(1 - np.square(L).sum(axis=1))
        
        ## Compute total variance.
        C = np.sum(A) + B
        
        ## Compute coefficient omega.
        omega[i] = A.sum() / C
        
        ## Compute coefficient omega subscale.
        omega_s[i] = A[i] / C
        
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Explained common variance.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
        
    ## Compute sum of squares.
    ss = np.square(loadings).sum(axis=0)
    
    ## Compute explained common variance.
    ecv = ss / ss.sum()
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### H-index
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    
    ## Preallocate space.
    H = np.zeros(len(D.columns))
    
    ## Iterate over factors.
    for i, col in enumerate(D.columns):
        
        ## Compute squared loadings.
        s = np.square(loadings[:,i])
        
        ## Compute H-index.
        H[i] = 1. / (1 + 1 / np.sum(s / (1-s)))
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Convert to DataFrame.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    
    stats.append(DataFrame(dict(
        subscale = D.columns,
        study = np.repeat(study, D.columns.size),
        ecv = ecv,
        omega = omega,
        omega_s = omega_s,
        H = H
    )))
    
## Concatenate DataFrames.
stats = concat(stats).replace({'teicher2015':1, 'tuominen2022': 2})

## Convert to pivot table.
stats = stats.pivot_table(['omega','omega_s','ecv','H'], 'subscale', 'study').round(3)
stats = stats.astype(str)
stats.loc[stats.index!='general','ecv'] = ''

## Re-organize rows.
index = ['general', 'PVA', 'PPhysA', 'NVEA', 'SexA', 'EN', 'PN', 'WSV', 'WIPV', 'PeerVA', 'PeerPhysA']
stats = stats.loc[index]

## Re-organize columns.
cols = [(1,'ecv'),(1,'omega'),(1,'omega_s'),(1,'H'),(2,'ecv'),(2,'omega'),(2,'omega_s'),(2,'H')]
stats = stats.swaplevel(axis='columns')[cols]

## Display table.
stats

study,1,1,1,1,2,2,2,2
Unnamed: 0_level_1,ecv,omega,omega_s,H,ecv,omega,omega_s,H
subscale,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
general,0.717,0.966,0.928,0.966,0.696,0.967,0.927,0.963
PVA,,0.912,0.068,0.272,,0.892,0.158,0.472
PPhysA,,0.788,0.144,0.272,,0.787,0.228,0.373
NVEA,,0.848,0.138,0.531,,0.828,0.119,0.425
SexA,,0.717,0.061,0.142,,0.753,0.292,0.454
EN,,0.712,0.141,0.259,,0.876,0.203,0.542
PN,,0.803,0.215,0.483,,0.817,0.117,0.289
WSV,,0.7,0.028,0.056,,0.663,0.037,0.068
WIPV,,0.666,0.145,0.197,,0.623,0.083,0.116
PeerVA,,0.878,0.624,0.82,,0.851,0.569,0.766


In [6]:
print(stats.to_latex())

\begin{tabular}{lllllllll}
\toprule
study & \multicolumn{4}{l}{1} & \multicolumn{4}{l}{2} \\
{} &    ecv &  omega & omega\_s &      H &    ecv &  omega & omega\_s &      H \\
subscale  &        &        &         &        &        &        &         &        \\
\midrule
general   &  0.717 &  0.966 &   0.928 &  0.966 &  0.696 &  0.967 &   0.927 &  0.963 \\
PVA       &        &  0.912 &   0.068 &  0.272 &        &  0.892 &   0.158 &  0.472 \\
PPhysA    &        &  0.788 &   0.144 &  0.272 &        &  0.787 &   0.228 &  0.373 \\
NVEA      &        &  0.848 &   0.138 &  0.531 &        &  0.828 &   0.119 &  0.425 \\
SexA      &        &  0.717 &   0.061 &  0.142 &        &  0.753 &   0.292 &  0.454 \\
EN        &        &  0.712 &   0.141 &  0.259 &        &  0.876 &   0.203 &  0.542 \\
PN        &        &  0.803 &   0.215 &  0.483 &        &  0.817 &   0.117 &  0.289 \\
WSV       &        &    0.7 &   0.028 &  0.056 &        &  0.663 &   0.037 &  0.068 \\
WIPV      &        &  0.666 &   0

### 3.2 Bifactor S-1 model

In [7]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Define parameters.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Define I/O parameters.
studies = ['teicher2015', 'tuominen2022']

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Main loop.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Restrict to columns of interest.
D = design[['general','peer','reverse']].copy()

stats = []
for study in studies:
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Load and prepare data.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

    ## Load Stan summary.
    summary = read_csv(os.path.join('stan_results', study, 'grmq_m3_summary.tsv'), sep='\t', index_col=0)
    
    ## Extract factor loadings.
    loadings = np.zeros_like(D).astype(float)
    for i, j in np.column_stack([np.where(D)]).T:
        loadings[i,j] = summary.loc[f'lambda[{i+1},{j+1}]','Mean']
        
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Coefficient omega hierachical.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
        
    ## Preallocate space.
    omega   = np.zeros(len(D.columns))
    omega_s = np.zeros(len(D.columns))
        
    ## Iterate over factors.
    for i, col in enumerate(D.columns):
        
        ## Restrict to items in group.
        L = loadings[D[col]==1]
        
        ## Compute squared sum of factor loadings.
        A = np.square(np.sum(L, axis=0))
        
        ## Compute sum of error variances.
        B = np.sum(1 - np.square(L).sum(axis=1))
        
        ## Compute total variance.
        C = np.sum(A) + B
        
        ## Compute coefficient omega.
        omega[i] = A.sum() / C
        
        ## Compute coefficient omega subscale.
        omega_s[i] = A[i] / C
        
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Explained common variance.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
        
    ## Compute sum of squares.
    ss = np.square(loadings).sum(axis=0)
    
    ## Compute explained common variance.
    ecv = ss / ss.sum()
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### H-index
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    
    ## Preallocate space.
    H = np.zeros(len(D.columns))
    
    ## Iterate over factors.
    for i, col in enumerate(D.columns):
        
        ## Compute squared loadings.
        s = np.square(loadings[:,i])
        
        ## Compute H-index.
        H[i] = 1. / (1 + 1 / np.sum(s / (1-s)))
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    ### Convert to DataFrame.
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
    
    stats.append(DataFrame(dict(
        subscale = D.columns,
        study = np.repeat(study, D.columns.size),
        ecv = ecv,
        omega = omega,
        omega_s = omega_s,
        H = H
    )))
    
## Concatenate DataFrames.
stats = concat(stats).replace({'teicher2015':1, 'tuominen2022': 2})

## Convert to pivot table.
stats = stats.pivot_table(['omega','omega_s','ecv','H'], 'subscale', 'study').round(3)
stats = stats.astype(str)
stats.loc[stats.index!='general','ecv'] = ''

## Re-organize rows.
index = D.columns
stats = stats.loc[index]

## Re-organize columns.
cols = [(1,'ecv'),(1,'omega'),(1,'omega_s'),(1,'H'),(2,'ecv'),(2,'omega'),(2,'omega_s'),(2,'H')]
stats = stats.swaplevel(axis='columns')[cols]

## Display table.
stats

study,1,1,1,1,2,2,2,2
Unnamed: 0_level_1,ecv,omega,omega_s,H,ecv,omega,omega_s,H
general,0.747,0.963,0.902,0.967,0.758,0.963,0.909,0.963
peer,,0.888,0.574,0.843,,0.869,0.502,0.788
reverse,,0.839,0.579,0.735,,0.917,0.492,0.773


In [8]:
print(stats.to_latex())

\begin{tabular}{lllllllll}
\toprule
study & \multicolumn{4}{l}{1} & \multicolumn{4}{l}{2} \\
{} &    ecv &  omega & omega\_s &      H &    ecv &  omega & omega\_s &      H \\
\midrule
general &  0.747 &  0.963 &   0.902 &  0.967 &  0.758 &  0.963 &   0.909 &  0.963 \\
peer    &        &  0.888 &   0.574 &  0.843 &        &  0.869 &   0.502 &  0.788 \\
reverse &        &  0.839 &   0.579 &  0.735 &        &  0.917 &   0.492 &  0.773 \\
\bottomrule
\end{tabular}

