Code to replicate the results reported for Study 1'.

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
%load_ext rpy2.ipython

In [3]:
from __future__ import division
import numpy as np
import random
import statsmodels.stats.api as sms
from pyspan.ratings_task.analysis import *

In [4]:
pos_ixs = filter(lambda ix: antonyms.loc[(ix+79,"valence")] == "POS", 
                 np.arange(20))
neg_ixs = filter(lambda ix: antonyms.loc[(ix+79,"valence")] == "NEG", 
                 np.arange(20))
# Reorder neg_ixs so each column matches its partner antonyms
neg_ixs = [90, 91, 92, 94, 95, 98, 79, 83, 86, 87]
neg_ixs = [ix-79 for ix in neg_ixs]

# Participants

In [5]:
dat = deepcopy(minidf[map(str, range(79,99))].values)
# ddat__ contains data from Democrats
ddat__ = dat[minidf.party == "Democrat",:]
# rdat__ contains data from Republicans
rdat__ = dat[minidf.party == "Republican",:]

In [6]:
dat.shape, ddat__.shape, rdat__.shape

((147, 20), (61, 20), (38, 20))

In [7]:
np.mean(minidf.loc[(minidf.party == "Democrat","age")]), \
stats.sem(minidf.loc[(minidf.party == "Democrat","age")])

(35.9344262295082, 1.3906142945108189)

In [8]:
minidf.loc[(minidf.party == "Democrat","gender")].value_counts()

F    35
M    26
Name: gender, dtype: int64

In [9]:
np.mean(minidf.loc[(minidf.party == "Republican","age")]), \
stats.sem(minidf.loc[(minidf.party == "Republican","age")])

(40.10526315789474, 1.5736040455896694)

In [10]:
minidf.loc[(minidf.party == "Republican","gender")].value_counts()

M    21
F    17
Name: gender, dtype: int64

# Results

In [11]:
# Dem responses on the positive stimuli
ddat_pos = ddat__[:,pos_ixs]
# Repub responses on the positive stimuli
rdat_pos = rdat__[:,pos_ixs]
# Dem responses on the negative stimuli
ddat_neg = ddat__[:,neg_ixs]
# Repub responses on the negative stimuli
rdat_neg = rdat__[:,neg_ixs]

Calculate means and standard errors, using the method provided by Arai (2011).

In [12]:
%%R
source("../../conditional-variation/clmclx.R")

In [13]:
%%R
descr.stats <- function(x) {
    y <- c(t(x))
    n <- dim(x)[1]
    k <- dim(x)[2]
    idxn <- rep(1:n, each=k)
    idxn <- idxn[which(!is.na(y))]
    idxk <- rep(1:k, n)
    idxk <- idxk[which(!is.na(y))]
    fit <- lm(y ~ 1)
    res <- mclx(fit, 1, idxn, idxk)
    # Add 1 because the data are coded on a 0--5 scale, but described in the paper on 
    # a 1--6 scale.
    res[1] <- res[1] + 1
    return (res[1:2])
}

In [14]:
%%R -i ddat_pos -o d_pos_mu,se_dp
ds <- descr.stats(ddat_pos)
d_pos_mu <- ds[1]
se_dp <- ds[2]

In [15]:
print(d_pos_mu, se_dp)

(array([3.24590164]), array([0.1696003]))


In [16]:
%%R -i ddat_neg -o d_neg_mu,se_dn
ds <- descr.stats(ddat_neg)
d_neg_mu <- ds[1]
se_dn <- ds[2]

In [17]:
print(d_neg_mu, se_dn)

(array([3.77011494]), array([0.20958466]))


In [18]:
%%R -i rdat_pos -o r_pos_mu,se_rp
ds <- descr.stats(rdat_pos)
r_pos_mu <- ds[1]
se_rp <- ds[2]

In [19]:
print(r_pos_mu, se_rp)

(array([3.81530343]), array([0.17491346]))


In [20]:
%%R -i rdat_neg -o r_neg_mu,se_rn
ds <- descr.stats(rdat_neg)
r_neg_mu <- ds[1]
se_rn <- ds[2]

In [21]:
print(r_neg_mu, se_rn)

(array([2.87631579]), array([0.11242717]))


A series of $t$-tests (with errors clustered at the participant level).

In [22]:
%%R
options(scipen=999)

clustered.t.test <- function (a, b, matched.participants=FALSE, matched.items=FALSE) {
    y <- c(t(a), t(b))
    n.a <- dim(a)[1]
    n.b <- dim(b)[1]
    k.a <- dim(a)[2]
    k.b <- dim(b)[2]
    stopifnot(k.a == k.b)
    if (matched.participants) {
        stopifnot(n.a == n.b)
        idxn <- c(rep(1:n.a, each=k.a), rep(1:n.b, each=k.b))
    } else {
        idxn <- c(rep(1:n.a, each=k.a), rep((n.a+1):(n.a+n.b), each=k.b))
    }
    idxn <- idxn[which(!is.na(y))]
    if (matched.items) {
        idxk <- rep(1:k.a, n.a+n.b)
    } else {
        idxk <- c(rep(1:k.a, n.a), rep((k.a+1):(k.a+k.b), n.b))
    }
    idxk <- idxk[which(!is.na(y))]
    groups <- c(rep(0, n.a*k.a), rep(1, n.b*k.b))
    fit <- lm(y ~ groups)
    df <- df.residual(fit)
    res <- mclx(fit, 1, idxn, idxk)
    t <- res[2,3]
    p <- res[2,4]
    # Divide p by two for a one-sided test
    return (c(t, df, p/2))
}

Hypothesis: Republicans' judgments on the positive words are higher than Democrats' judgments on the positive words.

In [23]:
%%R
clustered.t.test(ddat_pos, rdat_pos, matched.items=TRUE)

[1]   1.96647385 987.00000000   0.02476189


Hypothesis: Republicans' judgments on the negative words are lower than Democrats' judgments on the negative words.

In [24]:
%%R -i ddat_neg,rdat_neg
clustered.t.test(ddat_neg, rdat_neg, matched.items=TRUE)

[1]  -3.5838832047 987.0000000000   0.0001775382


Hypothesis: Republicans' judgments on the positive words are higher than Republicans' judgments on the negative words.

In [25]:
%%R -i rdat_pos,rdat_neg
clustered.t.test(rdat_neg, rdat_pos, matched.participants=TRUE)

[1]   4.312877856039 757.000000000000   0.000009123888


Hypothesis: Democrats' judgments on the negative words are higher than Democrats' judgments on the positive words.

In [26]:
%%R -i ddat_pos,ddat_neg
clustered.t.test(ddat_pos, ddat_neg, matched.participants=TRUE)

[1]    2.0329980 1217.0000000    0.0211347


## Mixed effects model

In [27]:
minidf.gender = minidf.gender.replace({ "M": 1, "F": -1 })

In [28]:
%%R -i minidf,pos_ixs,neg_ixs
# https://www.r-bloggers.com/how-to-do-repeated-measures-anovas-in-r/
library(lmerTest)
library(tidyverse)

options(contrasts=c("contr.sum","contr.poly"))

pos.idx = colnames(minidf)[pos_ixs+79]
neg.idx = colnames(minidf)[neg_ixs+79]
idx = c(pos.idx, neg.idx)

df <- minidf %>%
    as_tibble() %>%
    filter(party %in% c("Democrat","Republican")) %>%
    select(all_of(idx), "age", "gender", "party", "party_identity", "political_engagement") %>%
    mutate(ID=1:nrow(.)) %>%
    pivot_longer(idx, names_to="item", values_to="rating") %>%
    mutate(condition=item %in% pos.idx) %>%
    mutate(party=ifelse(party=="Republican", sqrt(.5), -sqrt(.5)),
           condition=ifelse(condition, sqrt(.5), -sqrt(.5)),
           age=age-18,
           party_identity=party_identity+3,
           political_engagement=political_engagement+3
          )

fit <- lmer(rating ~ condition * party + (1|ID) + (1|item), data = df)
summary(fit)

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(idx)` instead of `idx` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: rating ~ condition * party + (1 | ID) + (1 | item)
   Data: df

REML criterion at convergence: 6663.1

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.82978 -0.70284 -0.01842  0.68744  2.90831 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.09968  0.3157  
 item     (Intercept) 0.10051  0.3170  
 Residual             1.59320  1.2622  
Number of obs: 1978, groups:  ID, 99; item, 20

Fixed effects:
                  Estimate Std. Error         df t value            Pr(>|t|)
(Intercept)        2.42660    0.08332   25.00413  29.126 <0.0000000000000002
condition          0.14638   

In [29]:
%%R
# +- 2 SE
coef.interact <- summary(fit)$coefficients[4,1]
se.interact <- summary(fit)$coefficients[4,2]
print(c(coef.interact - 2*se.interact, coef.interact + 2*se.interact))

[1] 0.6142657 0.8477162


In [30]:
%%R
null.fit <- lmer(rating ~ condition + party + (1|ID) + (1|item), data = df)
anova(null.fit, fit, test="LRT")

Data: df
Models:
null.fit: rating ~ condition + party + (1 | ID) + (1 | item)
fit: rating ~ condition * party + (1 | ID) + (1 | item)
         npar    AIC    BIC  logLik deviance  Chisq Df            Pr(>Chisq)
null.fit    6 6812.4 6845.9 -3400.2   6800.4                                
fit         7 6663.7 6702.8 -3324.8   6649.7 150.69  1 < 0.00000000000000022
            
null.fit    
fit      ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [31]:
%%R
fit <- lmer(rating ~ condition * party + condition * age + condition * gender + condition * party * party_identity + condition * party * political_engagement + (1|ID) + (1|item), data = df)
summary(fit)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: rating ~ condition * party + condition * age + condition * gender +  
    condition * party * party_identity + condition * party *  
    political_engagement + (1 | ID) + (1 | item)
   Data: df

REML criterion at convergence: 6706.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.78563 -0.72061 -0.00114  0.68640  3.06830 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.09209  0.3035  
 item     (Intercept) 0.10053  0.3171  
 Residual             1.58701  1.2598  
Number of obs: 1978, groups:  ID, 99; item, 20

Fixed effects:
                                         Estimate   Std. Error           df
(Intercept)                             2.6864690    0.1573231   99.5348576
condition                              -0.0686692    0.1683306  104.2694736
party                                  -0.2159460    0.1768180   91.0387607
age             