In [1]:
# install.packages("markdown")
library(knitr)


# Loading
library(dplyr)

# to get R2 in lmer
# library(sjstats)
# library(sjPlot)


# fit and test linear mixed models
library(lme4)
library(lmerTest)

# contrasts
# install.packages("pbkrtest")
library(emmeans)

# function pvalue is here
library(scales)

# code to add *, ** and *** for significances
makeStars <- function(x){
  stars <- c("***", "**", "*", "")
  vec <- c(0, 0.001, 0.01, 0.05, 1.1)
  i <- findInterval(x, vec)
  stars[i]
}

# xlsx files
df <- read.csv("data/main.csv")

df <- df[df['engine']=='google',]


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix


Attaching package: ‘lmerTest’


The following object is masked from ‘package:lme4’:

    lmer


The following object is masked from ‘package:stats’:

    step




# Factorization

In [2]:
# factorize topics
df$region <- as.factor(df$region)
df$browser <- as.factor(df$browser)
df$id <- as.factor(df$id)

df$topic <- as.factor(df$topic)
df$trt <- factor(df$trt, levels = c('risks', 'benefits'))


# Full Analisys

In [3]:
options(width = 10000)

fit <- lmer(valence ~   trt * topic + region + browser + (day|id),  data = df)

print(anova(fit, type='II'))
print(summary(fit))


boundary (singular) fit: see help('isSingular')



Type II Analysis of Variance Table with Satterthwaite's method
          Sum Sq Mean Sq NumDF DenDF   F value    Pr(>F)    
trt       425.64  425.64     1 13314 1891.4837 < 2.2e-16 ***
topic     771.04  154.21     5 13314  685.2834 < 2.2e-16 ***
region      1.89    1.89     1 13314    8.4007  0.003757 ** 
browser     0.00    0.00     1 13314    0.0063  0.936556    
trt:topic 302.78   60.56     5 13314  269.1050 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: valence ~ trt * topic + region + browser + (day | id)
   Data: df

REML criterion at convergence: 18025.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.7589 -0.7581 -0.1054  0.6550  2.1306 

Random effects:
 Groups   Name        Variance  Std.Dev.  Corr
 id       (Intercept) 0.000e+00 0.000e+00     
          day         2.077e-19 4.557e-10  NaN
 Residual             2.250e-01 4.744e-01 


Correlation matrix not shown by default, as p = 14 > 12.
Use print(summary(fit), correlation=TRUE)  or
    vcov(summary(fit))        if you need it




optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')



# Contrasts for Treatment: Risks vs Benefits

In [4]:
# asymptotic is used for approximations because other methods ("kenward-roger", "satterthwaite") are 
# computationally too expensive
# https://link.springer.com/article/10.3758/s13428-016-0809-y


the_means <- emmeans(fit, ~  trt |(topic), lmer.df = "asymptotic")
contrast_trt <- pairs(the_means)
# print(contrast_trt)


# create dataframe of contrasts
em <- pairs(the_means, interaction = "pairwise", infer = c(TRUE, TRUE)) %>%  rbind() 

# bonferroni method for 16 tests 
em

 topic            trt_pairwise     estimate     SE  df asymp.LCL asymp.UCL z.ratio p.value
 coffee           risks - benefits    0.170 0.0200 Inf     0.117    0.2225   8.463  <.0001
 covid vaccines   risks - benefits   -0.148 0.0206 Inf    -0.203   -0.0939  -7.197  <.0001
 cryptocurrencies risks - benefits   -0.709 0.0200 Inf    -0.762   -0.6560 -35.365  <.0001
 internet         risks - benefits   -0.546 0.0200 Inf    -0.599   -0.4935 -27.259  <.0001
 social media     risks - benefits   -0.300 0.0200 Inf    -0.353   -0.2471 -14.966  <.0001
 vaccines         risks - benefits   -0.600 0.0200 Inf    -0.653   -0.5471 -29.931  <.0001

Results are averaged over some or all of the levels of: region, browser 
Degrees-of-freedom method: asymptotic 
Confidence level used: 0.95 
Conf-level adjustment: bonferroni method for 6 estimates 
P value adjustment: bonferroni method for 6 tests 

In [5]:
print (the_means %>%  rbind() %>% as.data.frame() 
        %>% mutate(across(where(is.numeric), round, 4)))
     

[1m[22m[36mℹ[39m In argument: `across(where(is.numeric), round, 4)`.
[1m[22m[33m![39m The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))”


              topic      trt  emmean     SE  df asymp.LCL asymp.UCL
1            coffee    risks  0.3713 0.0142 Inf    0.3307    0.4120
2            coffee benefits  0.2017 0.0142 Inf    0.1610    0.2424
3    covid vaccines    risks  0.2963 0.0150 Inf    0.2535    0.3392
4    covid vaccines benefits  0.4446 0.0142 Inf    0.4039    0.4852
5  cryptocurrencies    risks -0.2072 0.0142 Inf   -0.2479   -0.1666
6  cryptocurrencies benefits  0.5017 0.0142 Inf    0.4610    0.5424
7          internet    risks -0.4840 0.0142 Inf   -0.5247   -0.4434
8          internet benefits  0.0624 0.0142 Inf    0.0218    0.1031
9      social media    risks -0.2983 0.0142 Inf   -0.3390   -0.2576
10     social media benefits  0.0017 0.0142 Inf   -0.0390    0.0424
11         vaccines    risks  0.1017 0.0142 Inf    0.0610    0.1424
12         vaccines benefits  0.7017 0.0142 Inf    0.6610    0.7424


In [6]:
# convert to data frame and sort
emdf <-  as.data.frame(em) %>% arrange(desc(topic))

# add significative starts
emdf['sig'] <- makeStars(emdf$p.value)

# restrict p-values decimals and add <0.0001 where correspond
emdf[,"p.value"] <- pvalue(emdf[,"p.value"], accuracy = 0.001)

# round all values to 4 decimals
embf <- emdf %>% mutate(across(where(is.numeric), round, 4))

print(embf)
     


             topic     trt_pairwise estimate     SE  df asymp.LCL asymp.UCL  z.ratio p.value sig
1         vaccines risks - benefits  -0.6000 0.0200 Inf   -0.6529   -0.5471 -29.9313  <0.001 ***
2     social media risks - benefits  -0.3000 0.0200 Inf   -0.3529   -0.2471 -14.9657  <0.001 ***
3         internet risks - benefits  -0.5464 0.0200 Inf   -0.5993   -0.4935 -27.2589  <0.001 ***
4 cryptocurrencies risks - benefits  -0.7089 0.0200 Inf   -0.7618   -0.6560 -35.3653  <0.001 ***
5   covid vaccines risks - benefits  -0.1482 0.0206 Inf   -0.2025   -0.0939  -7.1965  <0.001 ***
6           coffee risks - benefits   0.1696 0.0200 Inf    0.1168    0.2225   8.4627  <0.001 ***


# Contrasts for Treatment: Health vs Technology

In [7]:
# asymptotic is used for approximations because other methods ("kenward-roger", "satterthwaite") are 
# computationally too expensive
# https://link.springer.com/article/10.3758/s13428-016-0809-y


the_means <- emmeans(fit, ~  topic |(trt), lmer.df = "asymptotic")
contrast_trt <- pairs(the_means)
# print(contrast_trt)


# create dataframe of contrasts
em <- pairs(the_means, interaction = "pairwise", infer = c(TRUE, TRUE)) %>%  rbind() 

# bonferroni method for 16 tests 
em

 trt      topic_pairwise                    estimate     SE  df asymp.LCL asymp.UCL z.ratio p.value
 risks    coffee - covid vaccines             0.0750 0.0206 Inf   0.01025   0.13975   3.642  0.0081
 risks    coffee - cryptocurrencies           0.5786 0.0200 Inf   0.51555   0.64160  28.862  <.0001
 risks    coffee - internet                   0.8554 0.0200 Inf   0.79233   0.91838  42.670  <.0001
 risks    coffee - social media               0.6696 0.0200 Inf   0.60662   0.73267  33.405  <.0001
 risks    coffee - vaccines                   0.2696 0.0200 Inf   0.20662   0.33267  13.451  <.0001
 risks    covid vaccines - cryptocurrencies   0.5036 0.0206 Inf   0.43882   0.56832  24.451  <.0001
 risks    covid vaccines - internet           0.7804 0.0206 Inf   0.71561   0.84511  37.890  <.0001
 risks    covid vaccines - social media       0.5946 0.0206 Inf   0.52989   0.65939  28.873  <.0001
 risks    covid vaccines - vaccines           0.1946 0.0206 Inf   0.12989   0.25939   9.451  <.0001


In [8]:
print (the_means %>%  rbind() %>% as.data.frame() 
         %>% mutate(across(where(is.numeric), round, 4)))
     

        trt            topic  emmean     SE  df asymp.LCL asymp.UCL
1     risks           coffee  0.3713 0.0142 Inf    0.3307    0.4120
2     risks   covid vaccines  0.2963 0.0150 Inf    0.2535    0.3392
3     risks cryptocurrencies -0.2072 0.0142 Inf   -0.2479   -0.1666
4     risks         internet -0.4840 0.0142 Inf   -0.5247   -0.4434
5     risks     social media -0.2983 0.0142 Inf   -0.3390   -0.2576
6     risks         vaccines  0.1017 0.0142 Inf    0.0610    0.1424
7  benefits           coffee  0.2017 0.0142 Inf    0.1610    0.2424
8  benefits   covid vaccines  0.4446 0.0142 Inf    0.4039    0.4852
9  benefits cryptocurrencies  0.5017 0.0142 Inf    0.4610    0.5424
10 benefits         internet  0.0624 0.0142 Inf    0.0218    0.1031
11 benefits     social media  0.0017 0.0142 Inf   -0.0390    0.0424
12 benefits         vaccines  0.7017 0.0142 Inf    0.6610    0.7424


In [9]:
# convert to data frame and sort
emdf <-  as.data.frame(em) %>% arrange(trt)

# add significative starts
emdf['sig'] <- makeStars(emdf$p.value)

# restrict p-values decimals and add <0.0001 where correspond
emdf[,"p.value"] <- pvalue(emdf[,"p.value"], accuracy = 0.001)

# round all values to 4 decimals
embf <- emdf %>% mutate(across(where(is.numeric), round, 4))

print(embf)

        trt                    topic_pairwise estimate     SE  df asymp.LCL asymp.UCL  z.ratio p.value sig
1  benefits           coffee - covid vaccines  -0.2429 0.0200 Inf   -0.3059   -0.1798 -12.1151  <0.001 ***
2  benefits         coffee - cryptocurrencies  -0.3000 0.0200 Inf   -0.3630   -0.2370 -14.9657  <0.001 ***
3  benefits                 coffee - internet   0.1393 0.0200 Inf    0.0763    0.2023   6.9483  <0.001 ***
4  benefits             coffee - social media   0.2000 0.0200 Inf    0.1370    0.2630   9.9771  <0.001 ***
5  benefits                 coffee - vaccines  -0.5000 0.0200 Inf   -0.5630   -0.4370 -24.9428  <0.001 ***
6  benefits covid vaccines - cryptocurrencies  -0.0571 0.0200 Inf   -0.1202    0.0059  -2.8506   0.131    
7  benefits         covid vaccines - internet   0.3821 0.0200 Inf    0.3191    0.4452  19.0634  <0.001 ***
8  benefits     covid vaccines - social media   0.4429 0.0200 Inf    0.3798    0.5059  22.0922  <0.001 ***
9  benefits         covid vaccines - 