In [7]:
import pandas as pd
import numpy as np
import os 

  from scipy.stats.stats import pearsonr


In [15]:
# Change as needed
dir = '/Users/jamiezhang/Desktop/COVIDiSTRESS/'

dfs = []
months = []

# Open cleaned data files
for file in os.listdir(dir):
    if (not file.startswith('~') and 
        file.startswith('clean') and
        file.endswith('.csv')): 
        dfs.append(pd.read_csv(file, index_col=0, encoding='ISO-8859-1'))
        months.append(file[6:].split('.')[0])

In [19]:
from scipy.stats.stats import pearsonr   

# Pairwise correlations between dependent variables
for i, df in enumerate(dfs): 
    pss_composite = df.loc[:, 'pss10_composite']
    lon_composite = df.loc[:, 'lon_composite']
    sps_composite = df.loc[:, 'sps_composite']
    pss_lon, p_pl = pearsonr(pss_composite, lon_composite)
    lon_sps, p_ls = pearsonr(lon_composite, sps_composite)
    pss_sps, p_ps = pearsonr(pss_composite, sps_composite)
    print(f"Month: {months[i]}")
    print(f"    Stress & Loneliness Corr:{pss_lon: .4f}, p_value:{p_pl: .4f}")
    print(f"    Loneliness & Social Support Corr:{lon_sps: .4f}, p_value:{p_ls: .4f}")
    print(f"    Stress & Social Support Corr:{pss_sps: .4f}, p_value:{p_ps: .4f}")

Month: April
    Stress & Loneliness Corr: 0.5545, p_value: 0.0000
    Loneliness & Social Support Corr:-0.2095, p_value: 0.0000
    Stress & Social Support Corr:-0.2695, p_value: 0.0000
Month: May
    Stress & Loneliness Corr: 0.5563, p_value: 0.0000
    Loneliness & Social Support Corr:-0.2175, p_value: 0.0000
    Stress & Social Support Corr:-0.2699, p_value: 0.0000
Month: June
    Stress & Loneliness Corr: 0.5570, p_value: 0.0000
    Loneliness & Social Support Corr:-0.2187, p_value: 0.0000
    Stress & Social Support Corr:-0.2706, p_value: 0.0000


In [21]:
# MANOVA
from statsmodels.multivariate.manova import MANOVA

for i,df in enumerate(dfs):
    print(months[i])
    fit = MANOVA.from_formula('pss10_composite + lon_composite + sps_composite ~ marital_status', df)
    print(fit.mv_test())

April
                     Multivariate linear model
                                                                   
-------------------------------------------------------------------
       Intercept        Value  Num DF   Den DF     F Value   Pr > F
-------------------------------------------------------------------
          Wilks' lambda 0.1913 3.0000 81745.0000 115225.4518 0.0000
         Pillai's trace 0.8087 3.0000 81745.0000 115225.4518 0.0000
 Hotelling-Lawley trace 4.2287 3.0000 81745.0000 115225.4518 0.0000
    Roy's greatest root 4.2287 3.0000 81745.0000 115225.4518 0.0000
-------------------------------------------------------------------
                                                                   
-------------------------------------------------------------------
     marital_status     Value   Num DF    Den DF    F Value  Pr > F
-------------------------------------------------------------------
          Wilks' lambda 0.9389 12.0000 216277.2324  434.4316 0.

In [25]:
# Post-Hoc ANOVA
import statsmodels.api as sm
from statsmodels.formula.api import ols

for i,df in enumerate(dfs):
    # ANOVA for stress
    stress_model = ols('pss10_composite ~ marital_status', data=df).fit()
    stress_anova_table = sm.stats.anova_lm(stress_model, typ=2)
    print(f"{months[i]}: Stress")
    print(stress_anova_table)

    # ANOVA for loneliness
    loneliness_model = ols('lon_composite ~ marital_status', data=df).fit()
    loneliness_anova_table = sm.stats.anova_lm(loneliness_model, typ=2)
    print(f"\n{months[i]}: Loneliness")
    print(loneliness_anova_table)

    # ANOVA for social support
    social_support_model = ols('sps_composite ~ marital_status', data=df).fit()
    social_support_anova_table = sm.stats.anova_lm(social_support_model, typ=2)
    print(f"\n{months[i]}: Social Support")
    print(social_support_anova_table)

April: Stress
                      sum_sq       df           F  PR(>F)
marital_status  1.067905e+05      4.0  506.970399     0.0
Residual        4.304886e+06  81747.0         NaN     NaN

April: Loneliness
                       sum_sq       df           F  PR(>F)
marital_status   16400.185424      4.0  472.933478     0.0
Residual        708696.898623  81747.0         NaN     NaN

April: Social Support
                      sum_sq       df           F  PR(>F)
marital_status  2.145710e+05      4.0  768.483976     0.0
Residual        5.706213e+06  81747.0         NaN     NaN
May: Stress
                      sum_sq       df           F  PR(>F)
marital_status  1.199692e+05      4.0  565.791097     0.0
Residual        4.681482e+06  88314.0         NaN     NaN

May: Loneliness
                       sum_sq       df           F  PR(>F)
marital_status   18560.734592      4.0  534.014255     0.0
Residual        767382.471400  88314.0         NaN     NaN

May: Social Support
                  

In [26]:
# Post-Hoc TSD
from statsmodels.stats.multicomp import pairwise_tukeyhsd

for i,df in enumerate(dfs):
    print(months[i])
    # Stress
    posthoc_stress = pairwise_tukeyhsd(
        endog=df.loc[:,'pss10_composite'], groups=df.loc[:,'marital_status'], alpha=0.05
        )
    print("Stress:\n", posthoc_stress)

    # Loneliness
    posthoc_loneliness = pairwise_tukeyhsd(
        endog=df.loc[:,'lon_composite'], groups=df.loc[:,'marital_status'], alpha=0.05
        )
    print("Loneliness:\n", posthoc_loneliness)

    # Social Support
    posthoc_social_support = pairwise_tukeyhsd(
        endog=df.loc[:, 'sps_composite'], groups=df.loc[:,'marital_status'], alpha=0.05
        )
    print("Social Support:\n", posthoc_social_support)


April
                       Multiple Comparison of Means - Tukey HSD, FWER=0.05                       
            group1                        group2            meandiff p-adj   lower  upper  reject
-------------------------------------------------------------------------------------------------
             Divorced/widowed            Married/cohabiting   0.8548    0.0  0.5868 1.1228   True
             Divorced/widowed Other or would rather not say    3.354    0.0  2.9246 3.7833   True
             Divorced/widowed                        Single   3.0212    0.0  2.7435 3.2988   True
             Divorced/widowed        Uninformative response   2.8648 0.1083 -0.3562 6.0858  False
           Married/cohabiting Other or would rather not say   2.4992    0.0  2.1384 2.8599   True
           Married/cohabiting                        Single   2.1664    0.0  2.0151 2.3177   True
           Married/cohabiting        Uninformative response     2.01 0.4297 -1.2026 5.2226  False
Other or would