In [371]:
import pandas as pd

## Creating counterbalanced stimulus lists for novel rating study

In [372]:
# read stimuli
df = pd.read_csv('data/novel_stimuli_plus_prasada_data.csv')
df = df.sort_values(['subject', 'type']).reset_index(drop=True)
df.property = ' ' + df.property
display(df)

Unnamed: 0,type,item,subject,is_are,property,origin,Truth.judgment,Cue.validity.estimate,Prevalence.estimate,in.general,by.virtue.of,causal.essence,statistical,formal,should
0,principled,americans live in america,Americans,are they,live in America,novel,,,,,,,,,
1,statistical,americans are brunettes,Americans,are they,are brunettes,2013,0.21,0.21,49.84,,,,,,
2,principled,brazilians are south american,Brazilians,are they,are South American,novel,,,,,,,,,
3,statistical,brazilians like soccer,Brazilians,are they,like soccer,2006,,,71.79,5.72,2.89,2.06,2.89,4.50,3.33
4,principled,canadians live north of the us,Canadians,are they,live north of the US,novel,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,statistical,wallets are made of leather,wallets,are they,are made of leather,2006,,,63.53,5.67,2.11,1.72,3.06,4.22,4.67
204,principled,winters are cold,winters,are they,are cold,novel,,,,,,,,,
205,statistical,winters are snowy,winters,are they,are snowy,2006 & 2013,1.29,1.58,72.39,6.00,4.50,2.28,4.72,4.11,3.06
206,principled,worms live in the ground,worms,are they,live in the ground,novel,,,,,,,,,


In [373]:
# generate bare generic, by-virtue-of, prevalence, and cue-validity queries
df['generic'] = df.subject.str.capitalize() + df.property + '.'
df['by_virtue_of'] = '<i>Because</i> ' + (
    df.is_are
    .str.replace('are they', 'they are')
    .str.replace('is it', 'it is')
) + ' ' + df.subject + ', ' + df.subject + df.property + '.'
df['prevalence'] = 'Think about ' + df.subject + ', how likely ' + df.is_are + ' to' + (
    df.property
    .str.replace(' are ', ' be ')
    .str.replace(" don’t ", ' not ')
    .str.replace(' can ', ' be able to ')
    .str.replace(' is ', ' be ')
) + '?'
df['cue_validity'] = 'You learn that [unknown thing/people]' + df.property + ', how likely is it to be ' + df.subject + '?'
display(df)

Unnamed: 0,type,item,subject,is_are,property,origin,Truth.judgment,Cue.validity.estimate,Prevalence.estimate,in.general,by.virtue.of,causal.essence,statistical,formal,should,generic,by_virtue_of,prevalence,cue_validity
0,principled,americans live in america,Americans,are they,live in America,novel,,,,,,,,,,Americans live in America.,"<i>Because</i> they are Americans, Americans l...","Think about Americans, how likely are they to ...",You learn that [unknown thing/people] live in ...
1,statistical,americans are brunettes,Americans,are they,are brunettes,2013,0.21,0.21,49.84,,,,,,,Americans are brunettes.,"<i>Because</i> they are Americans, Americans a...","Think about Americans, how likely are they to ...",You learn that [unknown thing/people] are brun...
2,principled,brazilians are south american,Brazilians,are they,are South American,novel,,,,,,,,,,Brazilians are South American.,"<i>Because</i> they are Brazilians, Brazilians...","Think about Brazilians, how likely are they to...",You learn that [unknown thing/people] are Sout...
3,statistical,brazilians like soccer,Brazilians,are they,like soccer,2006,,,71.79,5.72,2.89,2.06,2.89,4.50,3.33,Brazilians like soccer.,"<i>Because</i> they are Brazilians, Brazilians...","Think about Brazilians, how likely are they to...",You learn that [unknown thing/people] like soc...
4,principled,canadians live north of the us,Canadians,are they,live north of the US,novel,,,,,,,,,,Canadians live north of the US.,"<i>Because</i> they are Canadians, Canadians l...","Think about Canadians, how likely are they to ...",You learn that [unknown thing/people] live nor...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,statistical,wallets are made of leather,wallets,are they,are made of leather,2006,,,63.53,5.67,2.11,1.72,3.06,4.22,4.67,Wallets are made of leather.,"<i>Because</i> they are wallets, wallets are m...","Think about wallets, how likely are they to be...",You learn that [unknown thing/people] are made...
204,principled,winters are cold,winters,are they,are cold,novel,,,,,,,,,,Winters are cold.,"<i>Because</i> they are winters, winters are c...","Think about winters, how likely are they to be...",You learn that [unknown thing/people] are cold...
205,statistical,winters are snowy,winters,are they,are snowy,2006 & 2013,1.29,1.58,72.39,6.00,4.50,2.28,4.72,4.11,3.06,Winters are snowy.,"<i>Because</i> they are winters, winters are s...","Think about winters, how likely are they to be...",You learn that [unknown thing/people] are snow...
206,principled,worms live in the ground,worms,are they,live in the ground,novel,,,,,,,,,,Worms live in the ground.,"<i>Because</i> they are worms, worms live in t...","Think about worms, how likely are they to live...",You learn that [unknown thing/people] live in ...


In [374]:
# random check of the counterbalancing and queries for a few stimulus subjects
df.query('subject == "milk"')

Unnamed: 0,type,item,subject,is_are,property,origin,Truth.judgment,Cue.validity.estimate,Prevalence.estimate,in.general,by.virtue.of,causal.essence,statistical,formal,should,generic,by_virtue_of,prevalence,cue_validity
128,principled,milk is white,milk,is it,is white,2006,,,92.12,5.06,4.83,3.0,5.17,4.17,5.39,Milk is white.,"<i>Because</i> it is milk, milk is white.","Think about milk, how likely is it to be white?",You learn that [unknown thing/people] is white...
129,statistical,milk comes from cows,milk,is it,comes from cows,novel,,,,,,,,,,Milk comes from cows.,"<i>Because</i> it is milk, milk comes from cows.","Think about milk, how likely is it to comes fr...",You learn that [unknown thing/people] comes fr...


In [375]:
df.query('subject == "spandex"')

Unnamed: 0,type,item,subject,is_are,property,origin,Truth.judgment,Cue.validity.estimate,Prevalence.estimate,in.general,by.virtue.of,causal.essence,statistical,formal,should,generic,by_virtue_of,prevalence,cue_validity
174,principled,spandex is stretchy,spandex,is it,is stretchy,2006,,,94.1,4.94,5.78,3.0,4.83,4.78,6.67,Spandex is stretchy.,"<i>Because</i> it is spandex, spandex is stret...","Think about spandex, how likely is it to be st...",You learn that [unknown thing/people] is stret...
175,statistical,spandex is thin,spandex,is it,is thin,novel,,,,,,,,,,Spandex is thin.,"<i>Because</i> it is spandex, spandex is thin.","Think about spandex, how likely is it to be thin?","You learn that [unknown thing/people] is thin,..."


In [376]:
df.query('subject == "grass"')

Unnamed: 0,type,item,subject,is_are,property,origin,Truth.judgment,Cue.validity.estimate,Prevalence.estimate,in.general,by.virtue.of,causal.essence,statistical,formal,should,generic,by_virtue_of,prevalence,cue_validity
106,principled,grass is green,grass,is it,is green,2006,,,94.47,5.06,5.06,3.0,4.33,5.11,5.72,Grass is green.,"<i>Because</i> it is grass, grass is green.","Think about grass, how likely is it to be green?",You learn that [unknown thing/people] is green...
107,statistical,grass grows on lawns,grass,is it,grows on lawns,novel,,,,,,,,,,Grass grows on lawns.,"<i>Because</i> it is grass, grass grows on lawns.","Think about grass, how likely is it to grows o...",You learn that [unknown thing/people] grows on...


In [377]:
# create fillers and catch trials; allocate blocks
block_allocation = 2 * [
    'generic',
    'by_virtue_of',
    'prevalence',
    'cue_validity',
]

fillers = [
    'Committing murders for fun is morally wrong.',
    'Stealing toys from little children is a bad thing to do.',
    'One cent is a lot of money.',
    'The tallest man alive is five feet tall.',
    'The heaviest man alive weighs twelve pounds.',
    'Tornadoes are dangerous.',
    'Getting struck by lightning is bad for your health.',
    'Two plus one is equal to six.',
]
filler_answers = [
    'completely true',
    'completely true',
    'completely false',
    'completely false',
    'completely false',
    'completely true',
    'completely true',
    'completely false',
]
fillers = pd.DataFrame({'stimulus': fillers, 'answer': filler_answers, 'question_type': block_allocation})
fillers['prompt'] = 'How true is the above statement?'
fillers['catch'] = 'filler'
fillers['choices'] = 'completely false,,,neither true nor false,,,completely true'

catch = 4 * [
    'Please select <i>completely true</i>.',
    'Please select <i>completely false</i>.',
]
catch_answers = 4 * [
    'completely true',
    'completely false',
]
catch = pd.DataFrame({'stimulus': catch, 'answer': catch_answers, 'question_type': block_allocation})
catch['prompt'] = ''
catch['catch'] = 'catch'
catch['choices'] = 'completely false,,,neither true nor false,,,completely true'

In [378]:
# create blocks for each of the four stimulus types
generic = df[['item', 'type', 'subject', 'property', 'generic']].copy()
generic['question_type'] = 'generic'
generic = generic.rename(columns={'generic': 'stimulus'})
generic['prompt'] = 'How true is the above statement?'
generic['choices'] = 'completely false,,,neither true nor false,,,completely true'

by_virtue_of = df[['item', 'type', 'subject', 'property', 'by_virtue_of']].copy()
by_virtue_of['question_type'] = 'by_virtue_of'
by_virtue_of = by_virtue_of.rename(columns={'by_virtue_of': 'stimulus'})
by_virtue_of['prompt'] = 'How true is the above statement?'
by_virtue_of['choices'] = 'completely false,,,neither true nor false,,,completely true'

prevalence = df[['item', 'type', 'subject', 'property', 'prevalence']].copy()
prevalence['question_type'] = 'prevalence'
prevalence = prevalence.rename(columns={'prevalence': 'stimulus'})
prevalence['prompt'] = ''
prevalence['choices'] = 'very unlikely,,,neither unlikely nor likely,,,very likely'

cue_validity = df[['item', 'type', 'subject', 'property', 'cue_validity']].copy()
cue_validity['question_type'] = 'cue_validity'
cue_validity = cue_validity.rename(columns={'cue_validity': 'stimulus'})
cue_validity['prompt'] = ''
cue_validity['choices'] = 'very unlikely,,,neither unlikely nor likely,,,very likely'

trials = pd.concat([in_general, by_virtue_of, normative, generic, prevalence, cue_validity])
display(trials)

Unnamed: 0,item,type,subject,property,stimulus,question_type,prompt,choices
0,americans live in america,principled,Americans,live in America,"<i>In general,</i> Americans live in America.",in_general,How true is the above statement?,"completely false,,,neither true nor false,,,co..."
1,americans are brunettes,statistical,Americans,are brunettes,"<i>In general,</i> Americans are brunettes.",in_general,How true is the above statement?,"completely false,,,neither true nor false,,,co..."
2,brazilians are south american,principled,Brazilians,are South American,"<i>In general,</i> Brazilians are South American.",in_general,How true is the above statement?,"completely false,,,neither true nor false,,,co..."
3,brazilians like soccer,statistical,Brazilians,like soccer,"<i>In general,</i> Brazilians like soccer.",in_general,How true is the above statement?,"completely false,,,neither true nor false,,,co..."
4,canadians live north of the us,principled,Canadians,live north of the US,"<i>In general,</i> Canadians live north of the...",in_general,How true is the above statement?,"completely false,,,neither true nor false,,,co..."
...,...,...,...,...,...,...,...,...
203,wallets are made of leather,statistical,wallets,are made of leather,You learn that [unknown thing/people] are made...,cue_validity,,"very unlikely,,,neither unlikely nor likely,,,..."
204,winters are cold,principled,winters,are cold,You learn that [unknown thing/people] are cold...,cue_validity,,"very unlikely,,,neither unlikely nor likely,,,..."
205,winters are snowy,statistical,winters,are snowy,You learn that [unknown thing/people] are snow...,cue_validity,,"very unlikely,,,neither unlikely nor likely,,,..."
206,worms live in the ground,principled,worms,live in the ground,You learn that [unknown thing/people] live in ...,cue_validity,,"very unlikely,,,neither unlikely nor likely,,,..."


In [379]:
# create sixteen counterbalanced list versions
subsets = []

for i in range(16):
    subset = pd.concat([
        
        generic.iloc[i % 16::16],
        generic.iloc[(i + 9) % 16::16],
        
        by_virtue_of.iloc[(i + 2) % 16::16],
        by_virtue_of.iloc[(i + 11) % 16::16],
        
        prevalence.iloc[(i + 4) % 16::16],
        prevalence.iloc[(i + 13) % 16::16],
        
        cue_validity.iloc[(i + 6) % 16::16],
        cue_validity.iloc[(i + 15) % 16::16],
        
        catch,
        fillers,
    ])
    subset['list_number'] = i
    subsets.append(subset)
    subset.to_csv(f'trials/trials_{i}.csv', index=False)
    
subsets = pd.concat(subsets)

In [380]:
# double check the counterbalancing scheme by counting occurrences of each item and each question type
# there should be 8 occurrences of each item, 2 per query type
subsets = subsets[subsets.item.notna()]
display(subsets.groupby('item').count())
display(subsets.groupby(['item', 'question_type']).count())

Unnamed: 0_level_0,type,subject,property,stimulus,question_type,prompt,choices,answer,catch,list_number
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
airplanes have passengers,8,8,8,8,8,8,8,0,0,8
airplanes have wings,8,8,8,8,8,8,8,0,0,8
ambulances have seatbelts,8,8,8,8,8,8,8,0,0,8
ambulances have sirens,8,8,8,8,8,8,8,0,0,8
americans are brunettes,8,8,8,8,8,8,8,0,0,8
...,...,...,...,...,...,...,...,...,...,...
wallets hold money,8,8,8,8,8,8,8,0,0,8
winters are cold,8,8,8,8,8,8,8,0,0,8
winters are snowy,8,8,8,8,8,8,8,0,0,8
worms are used as bait,8,8,8,8,8,8,8,0,0,8


Unnamed: 0_level_0,Unnamed: 1_level_0,type,subject,property,stimulus,prompt,choices,answer,catch,list_number
item,question_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
airplanes have passengers,by_virtue_of,2,2,2,2,2,2,0,0,2
airplanes have passengers,cue_validity,2,2,2,2,2,2,0,0,2
airplanes have passengers,generic,2,2,2,2,2,2,0,0,2
airplanes have passengers,prevalence,2,2,2,2,2,2,0,0,2
airplanes have wings,by_virtue_of,2,2,2,2,2,2,0,0,2
...,...,...,...,...,...,...,...,...,...,...
worms are used as bait,prevalence,2,2,2,2,2,2,0,0,2
worms live in the ground,by_virtue_of,2,2,2,2,2,2,0,0,2
worms live in the ground,cue_validity,2,2,2,2,2,2,0,0,2
worms live in the ground,generic,2,2,2,2,2,2,0,0,2


Looks good!