In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

In [2]:
stimuli = pd.read_csv('../../stimuli_specificity_2023-05-25.csv')[['set_nr','specificity','word1','word2']]

Extract specificity and items 

In [3]:
high = stimuli.query('specificity == "high"')[['set_nr','specificity','word2']].rename(columns={'word2':'option'})
mid = stimuli.query('specificity == "mid"')[['set_nr','specificity','word1','word2']]
mid['phrase'] = mid['word1'] + ' ' + mid['word2']
mid = mid[['set_nr','specificity','phrase']].rename(columns={'phrase':'option'})
low = stimuli.query('specificity == "low"')[['set_nr','specificity','word2']].rename(columns={'word2':'option'})
stimuli = pd.concat([high, mid, low])
stimuli

Unnamed: 0,set_nr,specificity,option
0,1,high,kangaroo
3,2,high,snail
6,3,high,cardinal
9,4,high,kingfisher
12,5,high,ostrich
...,...,...,...
290,97,low,vessel
293,98,low,vessel
296,99,low,wine
299,100,low,food


Build pairs of options

In [6]:
options_new = []
specificity_new = []
set_nr_new = []
stimuli_shuffled = shuffle(stimuli, random_state=42) # shuffle everything 
for i_set in stimuli_shuffled.set_nr.unique():
    stimuli_subset = stimuli_shuffled.query('set_nr == @i_set').reset_index(drop=True)
    for combination in [[0,1],[1,2],[0,2]]:
        options_new.append(stimuli_subset.iloc[combination].option.tolist())
        specificity_new.append(stimuli_subset.iloc[combination].specificity.tolist())
        set_nr_new.append(i_set)
options_new = np.array(options_new).transpose()
stimuli_new = pd.DataFrame({'set_nr': set_nr_new,
                            'specificity': specificity_new,
                            'option1': options_new[0],
                            'option2': options_new[1]})
stimuli_new = stimuli_new.sort_values(by='set_nr').reset_index(drop=True)
stimuli_new['item_nr'] = stimuli_new.index + 1
stimuli_new = stimuli_new[stimuli_new.columns.tolist()[-1:] + stimuli_new.columns.tolist()[:-1]]
stimuli_new

Unnamed: 0,item_nr,set_nr,specificity,option1,option2
0,1,79,"[mid, high]",digging tool,spade
1,2,79,"[high, low]",spade,tool
2,3,79,"[mid, low]",digging tool,tool
3,4,27,"[low, mid]",cat,striped cat
4,5,27,"[mid, high]",striped cat,tiger
...,...,...,...,...,...
298,299,89,"[low, mid]",vegetable,starchy vegetable
299,300,89,"[high, mid]",potato,starchy vegetable
300,301,51,"[high, low]",grape,fruit
301,302,51,"[low, mid]",fruit,purple fruit


Make gorilla spreadsheet: counterbalancing of choosing the less or more specific item in a pair

In [5]:
specificity_mapping = {'low':1, 'mid':2, 'high':3}
stimuli_new_with_answers = stimuli_new
stimuli_new_with_answers['answer'] = ''
for answer in ['less','more']:
    answers = []
    for row in stimuli_new.iterrows():
        row = row[1]
        if answer == 'less':
            if specificity_mapping[row.specificity[0]] < specificity_mapping[row.specificity[1]]:
                answers.append('left')
            else:
                answers.append('right')
        elif answer == 'more':
            if specificity_mapping[row.specificity[0]] < specificity_mapping[row.specificity[1]]:
                answers.append('right')
            else:
                answers.append('left')
    stimuli_new_with_answers['answer'] = answers
    stimuli_shuffled = shuffle(stimuli, random_state=42) # shuffle everything 
    stimuli_new_with_answers.to_csv(f'stimuli_gorilla_choose-{answer}.csv', index_col=False)