Analyses pre-registered for in-lab version of Study 3'.

Pre-registration: https://osf.io/de935

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from __future__ import division
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import re
from scipy import stats
import statsmodels.stats.api as sms
from pyspan.utils import *
from pyspan.plurals.analysis import *
assert not mturk
from pyspan.plurals.utils import *

# Testing the PM hypothesis

## Participants

In [3]:
len(valence)

172

In [4]:
# Comment to include participants who failed the attention check
valence = valence.loc[valence.atc_passed]
len(valence)

155

In [5]:
demographic_info(valence)

Age: 24.3096774194 (SE = 0.715181459241)
Gender: [('Both', 1), ('Female', 105), ('Male', 49)]


In [6]:
len(valence.loc[valence.Condition == "POSITIVE"]), len(valence.loc[valence.Condition == "NEGATIVE"])

(82, 73)

## Results

### Logistic regression

Selection of plural ~ Valence of item + Condition + Valence of item * Condition + Dummy indicating whether or not this was the first survey the participant took (including participant-level effects)

We hypothesize that the coefficient on Valence of item * Condition will be positive. We will commit to throwing out all data from the valence survey by participants who did not take the survey first if the order dummy has a non-zero coefficient.

In [7]:
vdummied, Y = dummy(valence, classes = [ "POSITIVE", "NEGATIVE" ],
                    sets = np.stack((words["large"], words["small"])))

In [8]:
def valence_condition_interaction(v, c):
    c = c if c == 1 else -1
    return v*c
valence_condition_interaction = np.vectorize(valence_condition_interaction)
X, Y = df_to_matrix(vdummied, Y,
                    columns = { 0: "valence", 1: "condition", 
                                2: (0,1,valence_condition_interaction), 
                                3: "order" })

In [9]:
logit = SparseLR(Y, X); print logit.coef[:4]; print logit.auc

[0.06618132 0.         0.41462573 0.        ]
0.721042266440916


### t-tests

For each participant, compute the proportion of positive items for which the participant chose the pluralized form of the word, the proportion of neutral items for which the participant chose the pluralized form of the word, and the proportion of negative items for which the participant chose the pluralized form of the word.

In [10]:
vsummary = valence[["Condition"]]
dat = valence[ixs].values
vsummary["pos_lg"] = np.apply_along_axis(get_prop, 1, dat, pos_lg, pos_sm)
vsummary["neu_lg"] = np.apply_along_axis(get_prop, 1, dat, neu_lg, neu_sm)
vsummary["neg_lg"] = np.apply_along_axis(get_prop, 1, dat, neg_lg, neg_sm)
assert vsummary.values.shape == (len(valence), 4)

### Positive condition

Hypothesis: mean(% pluralized positive items chosen) - mean(% pluralized neutral items chosen) > 0

In [11]:
a = vsummary.loc[vsummary["Condition"] == "POSITIVE"]["pos_lg"].values
b = vsummary.loc[vsummary["Condition"] == "POSITIVE"]["neu_lg"].values
t, p = stats.ttest_rel(a, b)
# Divide by 2 for one-sided p-value
t, p/2

(7.096091377075791, 2.198359595619331e-10)

Hypothesis: mean(% pluralized negative items chosen) - mean(% pluralized neutral items chosen) < 0

In [12]:
a = vsummary.loc[vsummary["Condition"] == "POSITIVE"]["neg_lg"].values
b = vsummary.loc[vsummary["Condition"] == "POSITIVE"]["neu_lg"].values
t, p = stats.ttest_rel(a, b)
# Divide by 2 for one-sided p-value
t, p/2

(-13.12335225566968, 4.76886084819926e-22)

### Negative condition

Hypothesis: mean(% pluralized negative items chosen) - mean(% pluralized neutral items chosen) > 0

In [13]:
a = vsummary.loc[vsummary["Condition"] == "NEGATIVE"]["neg_lg"].values
b = vsummary.loc[vsummary["Condition"] == "NEGATIVE"]["neu_lg"].values
idx = np.where(~(np.isnan(a) | np.isnan(b)))[0]
a = a[idx]
b = b[idx]
t, p = stats.ttest_rel(a, b)
# Divide by 2 for one-sided p-value
t, p/2

(9.189678704932744, 4.636418107307109e-14)

Hypothesis: mean(% pluralized positive items chosen) - mean(% pluralized neutral items chosen) < 0

In [14]:
a = vsummary.loc[(vsummary["Condition"] == "NEGATIVE") & 
                 (~np.isnan(vsummary["pos_lg"]))]["pos_lg"].values
b = vsummary.loc[(vsummary["Condition"] == "NEGATIVE") & 
                 (~np.isnan(vsummary["pos_lg"]))]["neu_lg"].values
delta = np.mean(a) - np.mean(b)
t, p = stats.ttest_rel(a, b)
# Divide by 2 for one-sided p-value
t, p/2

(-6.319004005662674, 1.0140719475598405e-08)

### Combining conditions

Recode observations in the negative condition to be the opposite of what participants chose.

In [15]:
vpos = vsummary.loc[vsummary["Condition"] == "POSITIVE"]
vneg = vsummary.loc[vsummary["Condition"] == "NEGATIVE"]
vneg["pos_lg"] = 1 - vneg["pos_lg"]
vneg["neu_lg"] = 1 - vneg["neu_lg"]
vneg["neg_lg"] = 1 - vneg["neg_lg"]
vrecoded = pd.concat([ vpos, vneg ])
assert len(vrecoded) == len(vpos) + len(vneg)

Hypothesis: mean(% pluralized positive items chosen) - mean(% pluralized neutral items chosen) > 0

In [16]:
a = vrecoded.loc[~np.isnan(vrecoded["pos_lg"])]["pos_lg"].values
b = vrecoded.loc[~np.isnan(vrecoded["pos_lg"])]["neu_lg"].values
delta = np.mean(a) - np.mean(b)
t, p = stats.ttest_rel(a, b)
# Divide by 2 for one-sided p-value
t, p/2

(9.514100524414973, 1.8940458669883742e-17)

Compute degrees of freedom.

In [17]:
len(a) - 1

153

In [18]:
se_delta = np.sqrt(np.var(a - b, ddof = 1) / len(a))
print(delta, delta - 2*se_delta, delta + 2*se_delta)

(0.1362258953168044, 0.10758926383131123, 0.16486252680229757)


In [19]:
np.mean(a), stats.sem(a), np.mean(b), stats.sem(b)

(0.6553384494293585,
 0.01982430182991738,
 0.5191125541125541,
 0.01838467428028642)

Hypothesis: mean(% pluralized negative items chosen) - mean(% pluralized neutral items chosen) < 0

In [20]:
a = vrecoded["neg_lg"].values
b = vrecoded["neu_lg"].values
idx = np.where(~(np.isnan(a) | np.isnan(b)))[0]
a = a[idx]
b = b[idx]
delta = np.mean(a) - np.mean(b)
t, p = stats.ttest_rel(a, b)
# Divide by 2 for one-sided p-value
t, p/2

(-15.532868816081432, 1.2306885474711773e-33)

Compute degrees of freedom.

In [21]:
len(a) - 1

154

In [22]:
se_delta = np.sqrt(np.var(a - b, ddof = 1) / len(a))
print(delta, delta - 2*se_delta, delta + 2*se_delta)

(-0.365505376344086, -0.41256756178734066, -0.3184431909008313)


In [23]:
np.mean(a), stats.sem(a), np.mean(b), stats.sem(b)

(0.15025806451612903,
 0.015071739122996372,
 0.515763440860215,
 0.018570179483636654)

# Testing the generalizability of the AVB to gender identity

## Participants

In [24]:
len(gender)

172

In [25]:
# Comment to include participants who failed the attention check
gender = gender.loc[gender.atc_passed]
len(gender)

156

In [26]:
gender = gender.loc[gender["ident"].isin(("Male", "Female"))]
gender.replace({ "Male": "MALE", "Female": "FEMALE" }, inplace = True)
len(gender)

155

As for Study 3, we pre-registered three different criteria for operationalizing which word in a given pair is considered the more positively-valenced choice. The submitted manuscript reports results for the within-subjects analysis.

## Results

### 1. Between subjects analysis: 60% item selection threshold

Considers a word to be positive when at least 60% of participants judged it to be more positively-valenced than its accompanying word.

In [27]:
positive60, negative60 = agreement_items(valence, [ "POSITIVE", "NEGATIVE" ], .6)

#### Logistic regression

Selection of positive word ~ Participant's gender identity + Condition + Participant's gender identity * Condition + Dummy indicating whether or not this was the first survey the participant took (including participant-level effects)

Selection of positive word ~ Participant's gender identity + Condition + Participant's gender identity * Condition + Dummy indicating whether or not this was the first survey the participant took (including participant-level effects)

In [28]:
gdummied, Y = dummy(gender, [ "MALE", "FEMALE" ],
                    sets = np.stack((positive60, 
                                     negative60)))
X, Y = df_to_matrix(gdummied, Y, columns = { 0: "id",
                                             1: "condition",
                                             2: (0,1), 
                                             3: "order" })

In [29]:
logit = SparseLR(Y, X); print logit.coef[:4]; logit.auc

[0. 0. 0. 0.]


0.5

#### t-tests

In [30]:
gsummary = gender[["Condition", "ident"]]
dat = gender[ixs].values
props = np.apply_along_axis(get_prop, 1, dat, positive60,
                            negative60)
gsummary["ppos"] = props
assert gsummary.values.shape == (len(gender), 3)

In [31]:
def one_sided_ttest(a, b):
    dsw_a = sms.DescrStatsW(a)
    dsw_b = sms.DescrStatsW(b)
    cm = sms.CompareMeans(dsw_a, dsw_b)
    return cm.ttest_ind(usevar="unequal", alternative = "larger")

For participants in the male condition, the difference in means mean(% positive words chosen amongst males) - mean(% positive words chosen amongst females) > 0.

In [32]:
a = gsummary.loc[(gsummary["Condition"] == "MALE") & (gsummary["ident"] == "MALE")]["ppos"].values
b = gsummary.loc[(gsummary["Condition"] == "MALE") & (gsummary["ident"] == "FEMALE")]["ppos"].values
one_sided_ttest(a, b)

(0.7988883330067161, 0.2139218038024439, 54.177569989625546)

For participants in the female condition, the difference in means mean(% positive words chosen amongst females) - mean(% positive words chosen amongst males) > 0.

In [33]:
a = gsummary.loc[(gsummary["Condition"] == "FEMALE") & (gsummary["ident"] == "FEMALE")]["ppos"].values
b = gsummary.loc[(gsummary["Condition"] == "FEMALE") & (gsummary["ident"] == "MALE")]["ppos"].values
one_sided_ttest(a, b)

(0.9717382056565919, 0.16791583362166612, 50.289463635242306)

The difference in means mean(% positive words chosen amongst those whose condition matched their gender identity) - mean(% positive words chosen amongst those whose condition didn't match their gender identity) > 0

In [34]:
a = gsummary.loc[gsummary["Condition"] == gsummary["ident"]]["ppos"].values
b = gsummary.loc[gsummary["Condition"] != gsummary["ident"]]["ppos"].values
one_sided_ttest(a, b)

(1.0433841682429088, 0.1492089860460073, 152.8333811065961)

### 2. Between subjects analysis: 80% item selection threshold

Considers a word to be positive when at least 80% of participants judged it to be more positively-valenced than its accompanying word.

In [35]:
positive80, negative80 = agreement_items(valence, [ "POSITIVE", "NEGATIVE" ], .8)

#### Logistic regression

In [36]:
gdummied, Y = dummy(gender, sets = np.stack((positive80, negative80)),
                    classes = [ "MALE", "FEMALE" ])

In [37]:
X, Y = df_to_matrix(gdummied, Y, columns = { 0: "id",
                                             1: "condition",
                                             2: (0,1), 
                                             3: "order" })

In [38]:
logit = SparseLR(Y, X); print logit.coef[:4]; logit.auc

[0. 0. 0. 0.]


0.5

#### t-tests

In [39]:
gsummary = gender[["Condition", "ident"]]
dat = gender[ixs].values
props = np.apply_along_axis(get_prop, 1, dat, positive80,
                            negative80)
gsummary["ppos"] = props
assert gsummary.values.shape == (len(gender), 3)

For participants in the male condition, the difference in means mean(% positive words chosen amongst males) - mean(% positive words chosen amongst females) > 0.

In [40]:
a = gsummary.loc[(gsummary["Condition"] == "MALE") & (gsummary["ident"] == "MALE")]["ppos"].values
b = gsummary.loc[(gsummary["Condition"] == "MALE") & (gsummary["ident"] == "FEMALE")]["ppos"].values
one_sided_ttest(a, b)

(0.9204290847258171, 0.18168055035012598, 36.68991183224334)

For participants in the female condition, the difference in means mean(% positive words chosen amongst females) - mean(% positive words chosen amongst males) > 0.

In [41]:
a = gsummary.loc[(gsummary["Condition"] == "FEMALE") & (gsummary["ident"] == "FEMALE")]["ppos"].values
b = gsummary.loc[(gsummary["Condition"] == "FEMALE") & (gsummary["ident"] == "MALE")]["ppos"].values
one_sided_ttest(a, b)

(1.3148706515920061, 0.09749007297910296, 46.65425970753836)

The difference in means mean(% positive words chosen amongst those whose condition matched their gender identity) - mean(% positive words chosen amongst those whose condition didn't match their gender identity) > 0

In [42]:
a = gsummary.loc[gsummary["Condition"] == gsummary["ident"]]["ppos"]
b = gsummary.loc[gsummary["Condition"] != gsummary["ident"]]["ppos"]
one_sided_ttest(a, b)

(2.376005410057496, 0.009395561857951163, 146.88517373416838)

### 3. Within-subjects analysis

Considers a word to be more positive when the participants had themselves selected that word as more positively-valenced in Survey 1

#### Participant info

In [43]:
genderInnerJoinValence = gender.loc[gender.index.isin(valence.index)]
len(genderInnerJoinValence)

135

In [44]:
demographic_info(genderInnerJoinValence.loc[genderInnerJoinValence.ident == "FEMALE"])

Age: 23.5714285714 (SE = 0.869527941928)
Gender: [('Female', 91)]


In [45]:
demographic_info(genderInnerJoinValence.loc[genderInnerJoinValence.ident == "MALE"])

Age: 25.1818181818 (SE = 1.58223245529)
Gender: [('Male', 44)]


In [46]:
len(genderInnerJoinValence.loc[(genderInnerJoinValence.Condition == "MALE") & (genderInnerJoinValence.ident == "MALE")]), \
len(genderInnerJoinValence.loc[(genderInnerJoinValence.Condition == "MALE") & (genderInnerJoinValence.ident == "FEMALE")]), \
len(genderInnerJoinValence.loc[(genderInnerJoinValence.Condition == "FEMALE") & (genderInnerJoinValence.ident == "MALE")]), \
len(genderInnerJoinValence.loc[(genderInnerJoinValence.Condition == "FEMALE") & (genderInnerJoinValence.ident == "FEMALE")])

(21, 54, 23, 37)

#### Logistic regression

In [47]:
# Pre-registered: "The within-subject analyses would be run
# both for items within each pre-specified valence category
# (25 positive items, 25 negative items and 25 neutral
# items), and using all 75 non-distractor items."
#
# Change the ixs_ variable below to restrict the subset of
# stims used for analysis
ixs_ = np.arange(100, 125) # Positive items
ixs_ = np.arange(125, 150) # Negative items
ixs_ = np.arange(150, 175) # Neutral items
ixs_ = np.arange(100, 175) # Positive, negative and neutral
# items

In [48]:
gdummied_ws, Y = dummy(gender, 
                       classes = [ "MALE", "FEMALE" ], 
                       within = True, 
                       ixs = ixs_)
X, Y = df_to_matrix(gdummied_ws, Y, 
                    columns = { 0: "id", 1: "condition", 
                                2: (0,1), 3: "order" }, 
                    ixs = ixs_)

In [49]:
logit = SparseLR(Y, X); print logit.coef[:4]; logit.auc

[0.         0.         0.02428076 0.        ]


0.5236100053163959

#### t-tests

In [50]:
gsummary_ws = summarize(gdummied_ws, ixs_)

For participants in the male condition, the difference in means mean(% positive words chosen amongst males) - mean(% positive words chosen amongst females) > 0.

In [51]:
a = gsummary_ws.loc[(gsummary_ws["Condition"] == 1) & (gsummary_ws["ident"] == 1)]["p"]
b = gsummary_ws.loc[(gsummary_ws["Condition"] == 1) & (gsummary_ws["ident"] == 0)]["p"]
one_sided_ttest(a, b)

(0.2423521225440973, 0.40507171508210915, 30.241311563158497)

For participants in the female condition, the difference in means mean(% positive words chosen amongst females) - mean(% positive words chosen amongst males) > 0.

In [52]:
a = gsummary_ws.loc[(gsummary_ws["Condition"] == 0) & (gsummary_ws["ident"] == 0)]["p"]
b = gsummary_ws.loc[(gsummary_ws["Condition"] == 0) & (gsummary_ws["ident"] == 1)]["p"]
one_sided_ttest(a, b)

(2.304410814138132, 0.013409988019385932, 37.598218286572525)

The difference in means mean(% positive words chosen amongst those whose condition matched their gender identity) - mean(% positive words chosen amongst those whose condition didn't match their gender identity) > 0

In [53]:
a = gsummary_ws.loc[gsummary_ws["Condition"] == gsummary_ws["ident"]]["p"]
b = gsummary_ws.loc[gsummary_ws["Condition"] != gsummary_ws["ident"]]["p"][~np.isnan(gsummary_ws.loc[gsummary_ws["Condition"] != gsummary_ws["ident"]]["p"])]

In [54]:
np.mean(a), stats.sem(a)

(0.5731461876009749, 0.018095610520860856)

In [55]:
np.mean(b), stats.sem(b)

(0.5205808485248917, 0.014891747193851126)

In [56]:
dsw_a = sms.DescrStatsW(a)
dsw_b = sms.DescrStatsW(b)
cm = sms.CompareMeans(dsw_a, dsw_b)
cm.ttest_ind(usevar="unequal", alternative = "larger")

(2.2429943236336713, 0.013288259844192529, 131.0486954062288)

In [57]:
delta = dsw_a.mean - dsw_b.mean
se_delta = cm.std_meandiff_separatevar
print(delta, delta - 2*se_delta, delta + 2*se_delta)

(0.052565339076083184, 0.005694655078161283, 0.09943602307400509)
