In [2]:
import os, json
import numpy as np
from os.path import join
from pandas import read_csv

## Load metadata.
metadata = read_csv(join('data', 'metadata.csv'))

#### Total sample

In [2]:
## Print platform breakdown.
print(metadata.platform.value_counts())

mturk       205
prolific    201
Name: platform, dtype: int64


#### Age composition

In [3]:
## Bin ages.
metadata['age_bin'] = np.digitize(metadata.age, [25.5,35.5,45.5,55.5])
metadata['age_bin'] = metadata.age_bin.replace({0:'18-25',1:'26-35',2:'36-45',3:'46-55',4:'55+'})

## Construct pivot table.
age = metadata.groupby(['platform','age_bin']).subject.count().reset_index()
age['%'] = age.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
age = age.pivot_table(['age_bin','%'], 'age_bin', 'platform')
age = age.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False])

## Print age composition.
age

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
age_bin,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
18-25,16,7.8,79,39.3
26-35,86,42.0,69,34.3
36-45,49,23.9,31,15.4
46-55,26,12.7,13,6.5
55+,28,13.7,9,4.5


#### Gender composition

In [4]:
## Construct pivot table.
gender = metadata.groupby(['platform','gender-categorical']).subject.count().reset_index()
gender['%'] = gender.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
gender = gender.pivot_table(['subject','%'], 'gender-categorical', 'platform')
gender = gender.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print gender composition.
gender

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
gender-categorical,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Female,86.0,42.0,112.0,55.7
Male,119.0,58.0,86.0,42.8
Other,0.0,0.0,3.0,1.5


#### Ethnicity composition

In [5]:
## Construct pivot table.
ethnicity = metadata.groupby(['platform','ethnicity']).subject.count().reset_index()
ethnicity['%'] = ethnicity.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
ethnicity = ethnicity.pivot_table(['subject','%'], 'ethnicity', 'platform')
ethnicity = ethnicity.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print ethnic composition.
ethnicity

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
ethnicity,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Hispanic or Latino,27.0,13.2,10.0,5.0
Not Hispanic or Latino,174.0,84.9,184.0,91.5
Rather not say,2.0,1.0,7.0,3.5
Unknown,2.0,1.0,0.0,0.0


#### Race composition

In [6]:
## Define compression function.
f = lambda x: eval(x)[0] if len(eval(x)) == 1 else 'Multiracial'
metadata['race_compress'] = metadata.race.apply(f)

## Construct pivot table.
race = metadata.groupby(['platform','race_compress']).subject.count().reset_index()
race['%'] = race.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
race = race.pivot_table(['subject','%'], 'race_compress', 'platform')
race = race.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print racial composition.
race

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
race_compress,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
American Indian/Alaska Native,2.0,1.0,2.0,1.0
Asian,5.0,2.4,53.0,26.4
Black or African American,30.0,14.6,7.0,3.5
Multiracial,6.0,2.9,4.0,2.0
Native Hawaiian or other Pacific Islander,0.0,0.0,1.0,0.5
Rather not say,1.0,0.5,12.0,6.0
White,161.0,78.5,122.0,60.7


#### Platform use

In [7]:
## Construct pivot table.
platform = metadata.groupby(['platform','other-platform']).subject.count().reset_index()
platform['%'] = platform.groupby(['platform']).subject.apply(lambda x: x / x.sum()).round(3) * 100
platform = platform.pivot_table(['subject','%'], 'other-platform', 'platform')
platform = platform.reorder_levels([1,0],axis=1).sort_index(axis=1, ascending=[True,False]).fillna(0)

## Print other platform use.
platform

platform,mturk,mturk,prolific,prolific
Unnamed: 0_level_1,subject,%,subject,%
other-platform,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
No,115,56.1,173,86.1
Yes,90,43.9,28,13.9
