# Results 01

In [1]:
import os
import numpy as np
from pandas import read_csv

## Section 1: Participant Demographics

#### 1.1 Load and prepare data

In [2]:
## Load metadata.
metadata = read_csv(os.path.join('data', 'metadata.csv'))

## Restrict to first time participants.
metrics = read_csv(os.path.join('data', 'metrics.csv'))
metadata = metadata[metadata.subject.isin(metrics.subject)].reset_index(drop=True)

#### 1.2 Platform composition

In [3]:
metadata.platform.value_counts()

prolific    200
mturk       193
Name: platform, dtype: int64

#### 1.3 Age composition

In [5]:
from scipy.stats import ttest_ind

## Bin ages.
metadata['age_bin'] = np.digitize(metadata.age, [25.5,35.5,45.5,55.5])
metadata['age_bin'] = metadata.age_bin.replace({0:'18-25',1:'26-35',2:'36-45',3:'46-55',4:'55+'})

## Construct pivot table.
age = metadata.groupby(['platform','age_bin']).subject.count().reset_index()
age['%'] = age.groupby(['platform'], group_keys=False).subject.apply(lambda x: x / x.sum()).round(3) * 100
age = age.pivot_table(['age_bin','%'], 'age_bin', 'platform')
age = age.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False])

## Perform 2-sample t-test.
mu = metadata.query('platform=="mturk"').age.mean() - metadata.query('platform=="prolific"').age.mean()
tval, pval = ttest_ind(
    metadata.query('platform=="mturk"').age.dropna().values, 
    metadata.query('platform=="prolific"').age.dropna().values
)

## Compute effect size.
dval = tval * np.sqrt(np.sum(1 / metadata.platform.value_counts()))

## Compute confidence interval. 
lb = mu - 1.96 * (mu / tval)
ub = mu + 1.96 * (mu / tval)

## Report statistics.
print('mu = %0.1f, t = %0.3f, p = %0.3f, d = %0.3f, 95%% CI = [%0.1f, %0.1f]' %(mu, tval, pval, dval, lb, ub))

## Display age composition.
age

mu = 4.9, t = 4.248, p = 0.000, d = 0.429, 95% CI = [2.7, 7.2]


platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
age_bin,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
18-25,11.0,5.7,47.0,23.5
26-35,71.0,36.8,76.0,38.0
36-45,60.0,31.1,41.0,20.5
46-55,29.0,15.0,22.0,11.0
55+,22.0,11.4,14.0,7.0


#### 1.4 Gender composition

In [8]:
from statsmodels.stats.proportion import proportions_ztest, proportion_effectsize, confint_proportions_2indep

## Construct pivot table.
gender = metadata.groupby(['platform','gender']).subject.count().reset_index()
gender['%'] = gender.groupby(['platform'], group_keys=False).subject.apply(lambda x: x / x.sum()).round(3) * 100
gender = gender.pivot_table(['subject','%'], 'gender', 'platform')
gender = gender.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Extract observations.
k1, k2 = 124, 73
n1, n2 = 193, 200
p1, p2 = k1 / n1, k2 / n2

## Compute 2-sample proportions test.
zval, pval = proportions_ztest([k1, k2], [n1, n2], alternative='two-sided')

## Compute effect size.
hval = proportion_effectsize(p1, p2)

## Compute confidence interval.
lb, ub = confint_proportions_2indep(k1, n1, k2, n2, compare='diff', method='wald')

## Report statistics.
print('MTurk:    %s / %s (%0.3f)' %(k1, n1, p1))
print('Prolific:  %s / %s (%0.3f)' %(k2, n2, p2))
print('z(%s) = %0.3f, p = %0.3f, h = %0.3f, 95%% CI = [%0.3f, %0.3f]' %((n1+n2-2), zval, pval, hval, lb, ub))

## Print gender composition.
gender

MTurk:    124 / 193 (0.642)
Prolific:  73 / 200 (0.365)
z(391) = 5.500, p = 0.000, h = 0.562, 95% CI = [0.182, 0.372]


platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Female,68,35.2,122,61.0
Male,124,64.2,73,36.5
Other,1,0.5,5,2.5


#### Ethnicity composition

In [6]:
from statsmodels.stats.proportion import proportions_ztest

## Construct pivot table.
ethnicity = metadata.groupby(['platform','ethnicity']).subject.count().reset_index()
ethnicity['%'] = ethnicity.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
ethnicity = ethnicity.pivot_table(['subject','%'], 'ethnicity', 'platform')
ethnicity = ethnicity.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Perform 2-sample proportion test.
zval, pval = proportions_ztest([177, 180], [193, 200])
print('z = %0.3f, p = %0.3f' %(tval, pval))

## Print ethnic composition.
ethnicity

z = 4.248, p = 0.557


platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
ethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Hispanic or Latino,16.0,8.3,12.0,6.0
Not Hispanic or Latino,177.0,91.7,180.0,90.0
Rather not say,0.0,0.0,8.0,4.0


#### Race composition

In [7]:
## Define compression function.
f = lambda x: eval(x)[0] if len(eval(x)) == 1 else 'Multiracial'
metadata['race_compress'] = metadata.race.apply(f)

## Construct pivot table.
race = metadata.groupby(['platform','race_compress']).subject.count().reset_index()
race['%'] = race.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
race = race.pivot_table(['subject','%'], 'race_compress', 'platform')
race = race.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print racial composition.
race

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
race_compress,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
American Indian/Alaska Native,2.0,1.0,0.0,0.0
Asian,15.0,7.8,41.0,20.5
Black or African American,13.0,6.7,12.0,6.0
Multiracial,6.0,3.1,10.0,5.0
Rather not say,1.0,0.5,4.0,2.0
White,156.0,80.8,133.0,66.5
