In [59]:
library(tidyverse)
library(brms)
library(ggridges)
library(rethinking)

In [6]:
# Functions
factorize <- function(df){ # Create a function
  for(i in which(sapply(df, class) == "character")) # that looks for variables with the character class 
      df[[i]] = as.factor(df[[i]]) # and converts them to factor (i.e., categorical) class
  return(df)
}

In [51]:
# Get Data
# View Data
df  <- read.csv("20241108_PhD_DiaChk-RQ2a.csv")
#df[df == ''] <- NA #replace blank cells with NA
df <- factorize(df)
glimpse(df)
#colnames(df)
#sum(is.na(df))

Rows: 1,023
Columns: 71
$ docid            [3m[90m<fct>[39m[23m D0002, D0002, D0002, D0002, D0002, D0002, D0002, D000…
$ docyear          [3m[90m<int>[39m[23m 1883, 1883, 1883, 1883, 1883, 1883, 1883, 1883, 1883,…
$ docmonth         [3m[90m<lgl>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ authorName       [3m[90m<fct>[39m[23m Anne F. Richards, Anne F. Richards, Anne F. Richards,…
$ docauthorid      [3m[90m<fct>[39m[23m D0002, D0002, D0002, D0002, D0002, D0002, D0002, D000…
$ authorLocation   [3m[90m<fct>[39m[23m Australia, Australia, Australia, Australia, Australia…
$ authorGender     [3m[90m<fct>[39m[23m F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,…
$ nationalOrigin   [3m[90m<fct>[39m[23m English, English, English, English, English, English,…
$ irish            [3m[90m<fct>[39m[23m False, False, False, False, False, False, False, Fals…
$ otherUK          [3m[90m<fct>[39m[23m True, True, True, True, True, True, True,

## Multilevel model of sentiment (intercept only)

$$valence_{ik} \sim \mathrm{Normal}(\mu_{ik}, \sigma)$$
$$\mu_{ik} = \alpha_k$$
$$\alpha_k \sim \mathrm{Normal}(\gamma, \phi)* $$ 
$$\gamma \sim \mathrm{Normal}(0,1)* $$ 
$$\phi \sim \mathrm{Uniform}(0,1)* $$
$$\sigma \sim \mathrm{Uniform}(0,1)**$$ 

In [68]:
#f5pp <- brm(data=df2,
#            family = 'gaussian',
#            formula = valence ~ 1 + (1 | docauthorid),
#            prior=c(set_prior("normal(0,1)",class="Intercept"), # gamma
#                    set_prior("uniform(0,1)",class="sd", ub=1), #phi
#                    set_prior("uniform(0,1)",class="sigma", ub=1)), #sigma
#            iter=2000, warmup = 1000, chains=4, cores=4, seed = 4,
#            file = "20241011_PhD_DiaChkSen_pp.rds",
#            control=list(adapt_delta=.99))

f5pp <- readRDS("20241011_PhD_DiaChkSen_pp.rds")

print(f5pp)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: valence ~ 1 + (1 | docauthorid) 
   Data: df2 (Number of observations: 1023) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~docauthorid (Number of levels: 4) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.46      0.20     0.18     0.91 1.00      943     1148

Regression Coefficients:
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     0.15      0.23    -0.36     0.62 1.00      770      746

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.93      0.02     0.89     0.97 1.00     1815     1327

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergen

## Multilevel model of effect of topic on sentiment (random intercepts and random slopes)

$$valence_{ik} \sim \mathrm{Normal}(\mu_{ik}, \sigma)$$

$$\mu_{ik} = \beta_{0k} +
\beta_{1k}Places_i + 
\beta_{2k}Business_i + 
\beta_{3k}GirlPrivilege_i + 
\beta_{4k}WorkingBoy_i + 
\beta_{5k}Weather_i + 
\beta_{6k}People_i + 
\beta_{7k}Politics_i + 
\beta_{8k}Outback_i + 
\beta_{9k}Home_i$$

$$\beta_{0k} = \gamma_{00} + \eta_{0k}$$ 

$$\beta_{1k} = \gamma_{10} + \eta_{1k}$$
$$\beta_{2k} = \gamma_{20} + \eta_{2k}$$
$$\beta_{3k} = \gamma_{30} + \eta_{3k}$$
$$\beta_{4k} = \gamma_{40} + \eta_{4k}$$
$$\beta_{5k} = \gamma_{50} + \eta_{5k}$$
$$\beta_{6k} = \gamma_{60} + \eta_{6k}$$
$$\beta_{7k} = \gamma_{70} + \eta_{7k}$$
$$\beta_{8k} = \gamma_{80} + \eta_{8k}$$
$$\beta_{9k} = \gamma_{90} + \eta_{9k}$$

$$\left[ \begin{split}\eta_{0k} \\ \eta_{1k} \\ \eta_{2k} \\ \eta_{3k} \\ \eta_{4k} \\ \eta_{5k} \\ \eta_{6k}  \\ \eta_{7k} \\ \eta_{8k} \\ \eta_{9k}\end{split} \right] \sim \mathrm{MVNormal}\left(\left[ \begin{split}0 \\ 0\\ 0 \\ 0 \\ 0 \\ 0 \\ 0 \\ 0 \\ 0 \\ 0\end{split} \right], \Phi R \Phi\right)$$

$$\Phi = \begin{bmatrix}\phi_0&0&0&0&0&0&0&0&0&0 \\ 0&\phi_1&0&0&0&0&0&0&0&0 \\ 0&0&\phi_2&0&0&0&0&0&0&0 \\ 0&0&0&\phi_3&0&0&0&0&0&0 \\ 0&0&0&0&\phi_4&0&0&0&0&0 \\ 0&0&0&0&0&\phi_5&0&0&0&0 \\ 0&0&0&0&0&0&\phi_6&0&0&0 \\ 0&0&0&0&0&0&0&\phi_7&0&0 \\ 0&0&0&0&0&0&0&0&\phi_8&0 \\ 0&0&0&0&0&0&0&0&0&\phi_9 \end{bmatrix}$$

$$\sigma \sim \mathrm{Uniform}(0,1)$$ 
$$\gamma_{00},...,\gamma_{90} \sim \mathrm{Normal}(0,1)$$
$$\phi_0,...,\phi_9 \sim \mathrm{Uniform}(0,1)$$
$$R \sim \mathrm{LKJ}(2)$$

$$\mathrm{LKJ} = \begin{bmatrix}1&\rho_{01}&\rho_{02}&\rho_{03}&\rho_{04}&\rho_{05}&\rho_{06}&\rho_{07}&\rho_{08}&\rho_{09} \\ {\color{grey}\rho_{\color{grey}{01}}}&1&\rho_{12}&\rho_{13}&\rho_{14}&\rho_{15}&\rho_{16}&\rho_{17}&\rho_{18}&\rho_{19} \\ {\color{grey}\rho_{\color{grey}{02}}}& {\color{grey}\rho_{\color{grey}{12}}}&1&\rho_{23}&\rho_{24}&\rho_{25}&\rho_{26}&\rho_{27}&\rho_{28}&\rho_{29} \\  {\color{grey}\rho_{\color{grey}{03}}}&{\color{grey}\rho_{\color{grey}{13}}}&{\color{grey}\rho_{\color{grey}{23}}}&1&\rho_{34}&\rho_{35}&\rho_{36}&\rho_{37}&\rho_{38}&\rho_{39}  \\  {\color{grey}\rho_{\color{grey}{04}}}&{\color{grey}\rho_{\color{grey}{14}}}&{\color{grey}\rho_{\color{grey}{24}}}&{\color{grey}\rho_{\color{grey}{34}}}&1&\rho_{45}&\rho_{46}&\rho_{47}&\rho_{48}&\rho_{49}\\  {\color{grey}\rho_{\color{grey}{05}}}&{\color{grey}\rho_{\color{grey}{15}}}&{\color{grey}\rho_{\color{grey}{25}}}&{\color{grey}\rho_{\color{grey}{35}}}&{\color{grey}\rho_{\color{grey}{45}}}&1&\rho_{56}&\rho_{57}&\rho_{58}&\rho_{59} \\  {\color{grey}\rho_{\color{grey}{06}}}&{\color{grey}\rho_{\color{grey}{16}}}&{\color{grey}\rho_{\color{grey}{26}}}&{\color{grey}\rho_{\color{grey}{36}}}&{\color{grey}\rho_{\color{grey}{46}}}&{\color{grey}\rho_{\color{grey}{56}}}&1&\rho_{67}&\rho_{68}&\rho_{69} \\  {\color{grey}\rho_{\color{grey}{07}}}&{\color{grey}\rho_{\color{grey}{17}}}&{\color{grey}\rho_{\color{grey}{27}}}&{\color{grey}\rho_{\color{grey}{37}}}&{\color{grey}\rho_{\color{grey}{47}}}&{\color{grey}\rho_{\color{grey}{57}}}&{\color{grey}\rho_{\color{grey}{67}}}&1&\rho_{78}&\rho_{79} \\  {\color{grey}\rho_{\color{grey}{08}}}&{\color{grey}\rho_{\color{grey}{18}}}&{\color{grey}\rho_{\color{grey}{28}}}&{\color{grey}\rho_{\color{grey}{38}}}&{\color{grey}\rho_{\color{grey}{48}}}&{\color{grey}\rho_{\color{grey}{58}}}&{\color{grey}\rho_{\color{grey}{68}}}&{\color{grey}\rho_{\color{grey}{78}}}&1&\rho_{89} \\  {\color{grey}\rho_{\color{grey}{09}}}&{\color{grey}\rho_{\color{grey}{19}}}&{\color{grey}\rho_{\color{grey}{29}}}&{\color{grey}\rho_{\color{grey}{39}}}&{\color{grey}\rho_{\color{grey}{49}}}&{\color{grey}\rho_{\color{grey}{59}}}&{\color{grey}\rho_{\color{grey}{69}}}&{\color{grey}\rho_{\color{grey}{79}}}&{\color{grey}\rho_{\color{grey}{89}}}&1\end{bmatrix}$$

https://soci620.netlify.app/slides/soci620_20_randomSlopes.pdf,
https://soci620.netlify.app/slides/soci620_21_covariancePriors.pdf,
https://soci620.netlify.app/slides/soci620_22_twoLevelModels.pdf

In [69]:
#Multi Random Intercepts & Random Slopes

#f3_valTop_mis <- brm(
#    formula= valence ~ 1 +                    
#                   Places + 
#                   Business + 
#                   GirlPrivilege + 
#                   WorkingBoy + 
#                   Weather + 
#                   People + 
#                   Politics + 
#                   Outback + 
#                   Home + (1 +                    
#                   Places + 
#                   Business + 
#                   GirlPrivilege + 
#                   WorkingBoy + 
#                   Weather + 
#                   People + 
#                   Politics + 
#                   Outback + 
#                   Home | docauthorid),
#    family = 'gaussian',
#    prior=c(set_prior("normal(0,1)",class="Intercept"), # gamma_00
#            set_prior("normal(0,1)", class = "b"), # other gammas
#            set_prior("uniform(0,1)",class="sd", ub=1), # within class
#            set_prior("uniform(0,1)",class="sigma", ub=1), # between class
#            set_prior("lkj(2)", class = "cor")),
#    data=df,
#    chains=4, cores=4, iter=2000, warmup = 1000, seed = 4,
#    file = "20241018_PhD_diaChk_valTop_mis",
#    control=list(adapt_delta=.99)
#)

f3_valTop_mis <- readRDS("20241018_PhD_diaChk_valTop_mis.rds")

print(f3_valTop_mis)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: valence ~ 1 + Places + Business + GirlPrivilege + WorkingBoy + Weather + People + Politics + Outback + Home + (1 + Places + Business + GirlPrivilege + WorkingBoy + Weather + People + Politics + Outback + Home | docauthorid) 
   Data: df (Number of observations: 1023) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~docauthorid (Number of levels: 4) 
                              Estimate Est.Error l-95% CI u-95% CI Rhat
sd(Intercept)                     0.41      0.27     0.02     0.94 1.00
sd(Places)                        0.47      0.29     0.02     0.97 1.00
sd(Business)                      0.47      0.29     0.03     0.97 1.00
sd(GirlPrivilege)                 0.39      0.28     0.01     0.95 1.00
sd(WorkingBoy)                    0.56      0.27     0.04     0.98 1.00
sd(Weather)                       0.46      0.28     0.02

## Multilevel model of effect of social metrics on sentiment (random intercepts and random slopes)

$$valence_{ik} \sim \mathrm{Normal}(\mu_{ik}, \sigma)$$

$$\mu_{ik} = \beta_{0k} +
\beta_{1k}socialMetric.s_i $$

$$\beta_{0k} = \gamma_{00} + \eta_{0k}$$ 

$$\beta_{1k} = \gamma_{10} + \eta_{1k}$$

$$\left[ \begin{split}\eta_{0k} \\ \eta_{1k}\end{split} \right] \sim \mathrm{MVNormal}\left(\left[ \begin{split}0 \\ 0\\ \end{split} \right], \Phi R \Phi\right)$$

$$\Phi = \begin{bmatrix}\phi_0&0 \\ 0&\phi_1\end{bmatrix}$$

$$\sigma \sim \mathrm{Uniform}(0,1)$$ 
$$\gamma_{00}, \gamma_{10} \sim \mathrm{Normal}(0,1)$$
$$\phi_0,\phi_1 \sim \mathrm{Uniform}(0,1)$$
$$R \sim \mathrm{LKJ}(2)$$

https://soci620.netlify.app/slides/soci620_20_randomSlopes.pdf,
https://soci620.netlify.app/slides/soci620_21_covariancePriors.pdf,
https://soci620.netlify.app/slides/soci620_22_twoLevelModels.pdf

### External Referencing (named person entities)

In [70]:
#Multi Random Intercepts & Random Slopes

#f6_valSMR_mis <- brm(
#    formula= valence ~ 1 +                    
#                   mentsRate.s + (1 +                    
#                   mentsRate.s | docauthorid),
#    family = 'gaussian',
#    prior=c(set_prior("normal(0,1)",class="Intercept"), # gamma_00
#            set_prior("normal(0,1)", class = "b"), # other gammas
#            set_prior("uniform(0,1)",class="sd", ub=1), # within class
#            set_prior("uniform(0,1)",class="sigma", ub=1), # between class
#            set_prior("lkj(2)", class = "cor")),
#    data=df,
#    chains=4, cores=4, iter=2000, warmup = 1000, seed = 4,
#    file = "20241031_PhD_diaChk_valSMR_mis",
#    control=list(adapt_delta=.99)
#)

f6_valSMR_mis <- readRDS("20241031_PhD_diaChk_valSMR_mis.rds")

print(f6_valSMR_mis)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: valence ~ 1 + mentsRate.s + (1 + mentsRate.s | docauthorid) 
   Data: df (Number of observations: 1023) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~docauthorid (Number of levels: 4) 
                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                  0.43      0.20     0.16     0.89 1.00     1649
sd(mentsRate.s)                0.19      0.17     0.01     0.70 1.00      992
cor(Intercept,mentsRate.s)    -0.18      0.42    -0.87     0.66 1.00     3053
                           Tail_ESS
sd(Intercept)                  1892
sd(mentsRate.s)                1632
cor(Intercept,mentsRate.s)     2458

Regression Coefficients:
            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept       0.13      0.23    -0.33     0.63 1.00     1377     1806
mentsRate.s    -0.01      0

### Self-Referencing (1st person pronouns)

In [71]:
#Multi Random Intercepts & Random Slopes

#f8_valFPA_mis <- brm(
#    formula= valence ~ 1 +                    
#                   fppAllRate.s + (1 +                    
#                   fppAllRate.s | docauthorid),
#    family = 'gaussian',
#    prior=c(set_prior("normal(0,1)",class="Intercept"), # gamma_00
#            set_prior("normal(0,1)", class = "b"), # other gammas
#            set_prior("uniform(0,1)",class="sd", ub=1), # within class
#            set_prior("uniform(0,1)",class="sigma", ub=1), # between class
#            set_prior("lkj(2)", class = "cor")),
#    data=df,
#    chains=4, cores=4, iter=2000, warmup = 1000, seed = 4,
#    file = "20241031_PhD_diaChk_valFPA_mis",
#    control=list(adapt_delta=.99)
#)

f8_valFPA_mis <- readRDS("20241031_PhD_diaChk_valFPA_mis.rds")

print(f8_valFPA_mis)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: valence ~ 1 + fppAllRate.s + (1 + fppAllRate.s | docauthorid) 
   Data: df (Number of observations: 1023) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~docauthorid (Number of levels: 4) 
                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                   0.43      0.20     0.15     0.92 1.00     1430
sd(fppAllRate.s)                0.21      0.17     0.02     0.70 1.00     1168
cor(Intercept,fppAllRate.s)     0.18      0.41    -0.65     0.87 1.00     2478
                            Tail_ESS
sd(Intercept)                   1661
sd(fppAllRate.s)                1523
cor(Intercept,fppAllRate.s)     2514

Regression Coefficients:
             Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept        0.14      0.23    -0.34     0.62 1.00     1258     1936
fppAllRate.s   

## Subjective Well Being Themes

In [72]:
#Multi Random Intercepts & Random Slopes

#f3_valTopSWB_mis <- brm(
#    formula= valence ~ 1 +                    
#                   SE + 
#                   HF + 
#                   SS + 
#                   RC + 
#                   GI +
#                   LN + (1 +                    
#                   SE + 
#                   HF + 
#                   SS + 
#                   RC +
#                   GI +
#                   LN | docauthorid),
#    family = 'gaussian',
#    prior=c(set_prior("normal(0,1)",class="Intercept"), # gamma_00
#            set_prior("normal(0,1)", class = "b"), # other gammas
#            set_prior("uniform(0,1)",class="sd", ub=1), # within class
#            set_prior("uniform(0,1)",class="sigma", ub=1), # between class
#            set_prior("lkj(2)", class = "cor")),
#    data=df,
#    chains=4, cores=4, iter=2000, warmup = 1000, seed = 4,
#    file = "20241104_PhD_diaChk_valTopSWB_mis",
#    control=list(adapt_delta=.99)
#)

f3_valTopSWB_mis <- readRDS("20241104_PhD_diaChk_valTopSWB_mis.rds")

print(f3_valTopSWB_mis)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: valence ~ 1 + SE + HF + SS + RC + GI + LN + (1 + SE + HF + SS + RC + GI + LN | docauthorid) 
   Data: df (Number of observations: 1023) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~docauthorid (Number of levels: 4) 
                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)         0.47      0.25     0.03     0.94 1.00     2942     2386
sd(SE)                0.49      0.29     0.02     0.98 1.00     6468     2810
sd(HF)                0.55      0.28     0.03     0.98 1.00     5309     2686
sd(SS)                0.45      0.28     0.02     0.96 1.00     6044     3238
sd(RC)                0.49      0.29     0.03     0.98 1.00     7722     2572
sd(GI)                0.54      0.27     0.03     0.98 1.00     3426     2575
sd(LN)                0.49      0.29     0.02     0.98 1.00     6060     27