# Supplementary Results 1

In [1]:
import os
import numpy as np
from pandas import read_csv

## Section 1: Participant Demographics

#### 1.1 Load and prepare data

In [2]:
## Load metadata.
metadata = read_csv(os.path.join('data', 'metadata.csv'))

## Restrict to first time participants.
prev_complete = metadata.loc[metadata['prev_complete']=="Yes"]
metadata = metadata.loc[metadata['prev_complete']=="No"].reset_index(drop=True)

## Print number of repeat-participants.
print(prev_complete.platform.value_counts())

mturk       19
prolific     1
Name: platform, dtype: int64


#### 1.2 Platform composition

In [3]:
metadata.platform.value_counts()

prolific    200
mturk       186
Name: platform, dtype: int64

#### 1.3 Age composition

In [4]:
from scipy.stats import ttest_ind

## Bin ages.
metadata['age_bin'] = np.digitize(metadata.age, [25.5,35.5,45.5,55.5])
metadata['age_bin'] = metadata.age_bin.replace({0:'18-25',1:'26-35',2:'36-45',3:'46-55',4:'55+'})

## Construct pivot table.
age = metadata.groupby(['platform','age_bin']).subject.count().reset_index()
age['%'] = age.groupby(['platform'], group_keys=False).subject.apply(lambda x: x / x.sum()).round(3) * 100
age = age.pivot_table(['age_bin','%'], 'age_bin', 'platform')
age = age.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False])

## Compute two-samples t-test.
mu = metadata.query('platform=="mturk"').age.values.mean() - metadata.query('platform=="prolific"').age.values.mean()
tval, pval = ttest_ind(
    metadata.query('platform=="mturk"').age.values, 
    metadata.query('platform=="prolific"').age.values
)

## Compute effect size.
dval = tval * np.sqrt(np.sum(1 / metadata.platform.value_counts()))

## Compute confidence interval. 
lb = mu - 1.96 * (mu / tval)
ub = mu + 1.96 * (mu / tval)

## Report statistics.
print('mu = %0.1f, t = %0.3f, p = %0.3f, d = %0.3f, 95%% CI = [%0.1f, %0.1f]' %(mu, tval, pval, dval, lb, ub))

## Display age composition.
age

mu = 7.7, t = 6.567, p = 0.000, d = 0.669, 95% CI = [5.4, 10.0]


platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
age_bin,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
18-25,16.0,8.6,78.0,39.0
26-35,76.0,40.9,69.0,34.5
36-45,46.0,24.7,31.0,15.5
46-55,22.0,11.8,13.0,6.5
55+,26.0,14.0,9.0,4.5


#### 1.4 Gender composition

In [5]:
from statsmodels.stats.proportion import proportions_ztest, proportion_effectsize, confint_proportions_2indep

## Construct pivot table.
gender = metadata.groupby(['platform','gender-categorical']).subject.count().reset_index()
gender['%'] = gender.groupby(['platform'], group_keys=False).subject.apply(lambda x: x / x.sum()).round(3) * 100
gender = gender.pivot_table(['subject','%'], 'gender-categorical', 'platform')
gender = gender.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Extract observations.
k1, k2 = 103, 85
n1, n2 = 186, 200
p1, p2 = k1 / n1, k2 / n2

## Compute 2-sample proportions test.
zval, pval = proportions_ztest([k1, k2], [n1, n2], alternative='two-sided')

## Compute effect size.
hval = proportion_effectsize(p1, p2)

## Compute confidence interval.
lb, ub = confint_proportions_2indep(k1, n1, k2, n2, compare='diff', method='wald')

## Report statistics.
print('MTurk:    %s / %s (%0.3f)' %(k1, n1, p1))
print('Prolific:  %s / %s (%0.3f)' %(k2, n2, p2))
print('z(%s) = %0.3f, p = %0.3f, h = %0.3f, 95%% CI = [%0.3f, %0.3f]' %((n1+n2-2), zval, pval, hval, lb, ub))

## Print gender composition.
gender

MTurk:    103 / 186 (0.554)
Prolific:  85 / 200 (0.425)
z(384) = 2.529, p = 0.011, h = 0.258, 95% CI = [0.030, 0.228]


platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
gender-categorical,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Female,83.0,44.6,112.0,56.0
Male,103.0,55.4,85.0,42.5
Other,0.0,0.0,3.0,1.5


#### Ethnicity composition

In [6]:
from statsmodels.stats.proportion import proportions_ztest

## Construct pivot table.
ethnicity = metadata.groupby(['platform','ethnicity']).subject.count().reset_index()
ethnicity['%'] = ethnicity.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
ethnicity = ethnicity.pivot_table(['subject','%'], 'ethnicity', 'platform')
ethnicity = ethnicity.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)


## Perform 2-sample proportion test.
zval, pval = proportions_ztest([174.0, 184.0], [186, 200])
print('z = %0.3f, p = %0.3f' %(tval, pval))

## Print ethnic composition.
ethnicity

z = 6.567, p = 0.558


platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
ethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Hispanic or Latino,15.0,8.1,10.0,5.0
Not Hispanic or Latino,168.0,90.3,183.0,91.5
Rather not say,2.0,1.1,7.0,3.5
Unknown,1.0,0.5,0.0,0.0


#### Race composition

In [7]:
## Define compression function.
f = lambda x: eval(x)[0] if len(eval(x)) == 1 else 'Multiracial'
metadata['race_compress'] = metadata.race.apply(f)

## Construct pivot table.
race = metadata.groupby(['platform','race_compress']).subject.count().reset_index()
race['%'] = race.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
race = race.pivot_table(['subject','%'], 'race_compress', 'platform')
race = race.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print racial composition.
race

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
race_compress,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
American Indian/Alaska Native,2.0,1.1,2.0,1.0
Asian,5.0,2.7,53.0,26.5
Black or African American,21.0,11.3,7.0,3.5
Multiracial,6.0,3.2,4.0,2.0
Native Hawaiian or other Pacific Islander,0.0,0.0,1.0,0.5
Rather not say,1.0,0.5,12.0,6.0
White,151.0,81.2,121.0,60.5


#### Platform use

In [8]:
## Construct pivot table.
platform = metadata.groupby(['platform','other-platform']).subject.count().reset_index()
platform['%'] = platform.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
platform = platform.pivot_table(['subject','%'], 'other-platform', 'platform')
platform = platform.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print other platform use.
platform

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
other-platform,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
No,115,61.8,172,86.0
Yes,71,38.2,28,14.0
