In [1]:
import numpy as np
import pandas as pd
import random
from itertools import chain
from pathlib import Path


np.random.seed(0)

In [2]:
# root = Path.cwd() / 'stim_set_final_by_cat'
root = Path('/Users/mn/Desktop/g_images/stim_set_final_by_cat')
searchterm_paths = sorted([f for f in root.iterdir() if f.is_dir()])
search_terms = [p.name for p in searchterm_paths]

assert len(search_terms) == 24
print(search_terms)

['airplane', 'bat', 'boat', 'broccoli', 'cat', 'cow', 'elephant', 'face child', 'face female', 'face male', 'fish', 'flower', 'garden tools', 'gymnast', 'hammer', 'hand tools', 'houseplant', 'monkey', 'pineapple', 'surfer', 'tennis player', 'tomato', 'train', 'tree']


In [3]:
# copy all images to one folder

# import shutil
# out = '/Users/mn/Desktop/g_images/stim_set_final_dedup'
# jpegs = [r for r in root.glob('*/*.jpg')]

# for j in jpegs:
#     shutil.copy(j, out)

In [4]:
# class - anim x inanim

classification = {'animate': ['bat', 'cat', 'cow', 'elephant', 'face child', 'face female', 
                        'face male', 'fish', 'gymnast', 'monkey', 'surfer', 'tennis player']}
classification['inanimate'] = [cat for cat in search_terms if cat not in classification['animate']] # rest

assert len(classification['animate']) == len(classification['inanimate'])


# object category

object_categories = {
    "body (human)":     ['gymnast', 'surfer', 'tennis player'],
    "face (human)":     ['face child', 'face female', 'face male'],
    "body (animal)":    ['bat', 'elephant', 'fish'],
    "face (animal)":    ['cow', 'cat', 'monkey'],
    "plant":            ['flower', 'houseplant', 'tree'],
    "fruit / vegetable":['broccoli', 'tomato', 'pineapple'],
    "tool":             ['garden tools', 'hammer', 'hand tools'],
    "vehicle":          ['airplane', 'boat', 'train'],
}

assert len(list(chain(*object_categories.values()))) == 24

In [5]:
image_dict = {}

for i, category in enumerate(search_terms):
    image_dict[category] = sorted([str(imgpath).split("/")[-1] for imgpath in searchterm_paths[i].glob('*jpg') 
                                        if str(imgpath).split("/")[-1][:3] != 'dis'], 
                                        key=lambda x: int(''.join(filter(str.isdigit, str(x))))
                                        )

assert all([len(image_dict[k]) >= 30 for k in image_dict.keys()])

In [6]:
# overall - 120 imgs / 8 categories = 15 imgs / category
from copy import deepcopy
images_by_categories = deepcopy(object_categories)

for category in images_by_categories:
    for i, search_term in enumerate(images_by_categories[category]):
        images_by_categories[category][i] = image_dict[search_term]

In [7]:
# convert to df for easier manipulation

object_categories_astuples = []

for category in object_categories:
    for search_term in object_categories[category]:
        object_categories_astuples.append((category, search_term))

columns = pd.MultiIndex.from_tuples(sorted(object_categories_astuples))

images_df = pd.DataFrame.from_dict(image_dict, orient='index').T.dropna(how='any')
images_df = images_df[[c[1] for c in columns]]
images_df.columns = columns

assert len(images_df.index) == 30

#TODO: save df to csv
images_df.to_csv('images.csv', index=False)
images_df.head()

Unnamed: 0_level_0,body (animal),body (animal),body (animal),body (human),body (human),body (human),face (animal),face (animal),face (animal),face (human),...,fruit / vegetable,plant,plant,plant,tool,tool,tool,vehicle,vehicle,vehicle
Unnamed: 0_level_1,bat,elephant,fish,gymnast,surfer,tennis player,cat,cow,monkey,face child,...,tomato,flower,houseplant,tree,garden tools,hammer,hand tools,airplane,boat,train
0,bat_0.jpg,elephant_0.jpg,fish_2.jpg,gymnast_0.jpg,surfer_0.jpg,tennis player_6.jpg,cat_0.jpg,cow_0.jpg,monkey_9.jpg,face_new_b_0.jpg,...,tomato_1.jpg,flower_0.jpg,houseplant_4.jpg,tree_1.jpg,rake_0.jpg,hammer_1.jpg,hand tools_1.jpg,airplane_0.jpg,boat_2.jpg,train_0.jpg
1,bat_3.jpg,elephant_1.jpg,fish_3.jpg,gymnast_9.jpg,surfer_2.jpg,tennis player_11.jpg,cat_2.jpg,cow_2.jpg,monkey_43.jpg,baby's face_4.jpg,...,tomato_2.jpg,flower_12.jpg,houseplant_7.jpg,tree_4.jpg,rake_6.jpg,hammer_2.jpg,hand tools_3.jpg,plane_1.jpg,boat_3.jpg,train_2.jpg
2,bat_5.jpg,elephant_2.jpg,fish_8.jpg,gymnast_15.jpg,surfer_6.jpg,tennis_player_20.jpg,cat_4b.jpg,cow_3.jpg,monkey_49.jpg,face_new_b_5.jpg,...,tomato_3.jpg,flower_14.jpg,houseplant_8.jpg,tree_8.jpg,rake_8.jpg,hammer_3.jpg,hand tools_11.jpg,plane_5.jpg,boat_10.jpg,train_4.jpg
3,bat_6.jpg,elephant_5.jpg,fish_14.jpg,gymnast_25.jpg,surfer_12.jpg,tennis player_22.jpg,cat_4.jpg,cow_4.jpg,monkey_67.jpg,baby's face_8.jpg,...,tomato_15.jpg,flower_18.jpg,houseplant_15.jpg,tree_10.jpg,rake_10.jpg,hammer_9.jpg,wrench_18.jpg,airplane_8.jpg,boat_11.jpg,train_6.jpg
4,bat_7.jpg,elephant_6.jpg,fish_15.jpg,gymnast_28.jpg,surfer_13.jpg,tennis player_23.jpg,cat_8.jpg,cow_5.jpg,monkey_97.jpg,face_new_c_9.jpg,...,tomato_20.jpg,flower_22.jpg,houseplant_22.jpg,tree_10b.jpg,rake_11.jpg,hammer_2_0.jpg,hand tools_20.jpg,airplane_12.jpg,boat_12.jpg,train_11.jpg


In [8]:
list(images_df['body (animal)'].items())

[('bat',
  0       bat_0.jpg
  1       bat_3.jpg
  2       bat_5.jpg
  3       bat_6.jpg
  4       bat_7.jpg
  5       bat_8.jpg
  6       bat_9.jpg
  7      bat_11.jpg
  8      bat_15.jpg
  9      bat_19.jpg
  10     bat_21.jpg
  11     bat_24.jpg
  12     bat_34.jpg
  13     bat_40.jpg
  14     bat_41.jpg
  15     bat_49.jpg
  16     bat_61.jpg
  17     bat_65.jpg
  18     bat_94.jpg
  19    bat_121.jpg
  20    bat_125.jpg
  21    bat_199.jpg
  22    bat_204.jpg
  23    bat_241.jpg
  24    bat_245.jpg
  25    bat_246.jpg
  26    bat_247.jpg
  27    bat_250.jpg
  28    bat_253.jpg
  29    bat_259.jpg
  Name: bat, dtype: object),
 ('elephant',
  0       elephant_0.jpg
  1       elephant_1.jpg
  2       elephant_2.jpg
  3       elephant_5.jpg
  4       elephant_6.jpg
  5      elephant_10.jpg
  6      elephant_12.jpg
  7      elephant_14.jpg
  8      elephant_19.jpg
  9      elephant_21.jpg
  10     elephant_24.jpg
  11     elephant_27.jpg
  12    elephant_33b.jpg
  13     elephant_33.jp

In [9]:
stimA

NameError: name 'stimA' is not defined

In [None]:
# per one stim set - 120 imgs / 8 categories = 15 imgs / category
# => split into 6 sets every 5 rows

# NEW - per stim set - 60 imgs / 8 categories = 7 or 8 per category
# start w 6 per category

stimA = images_df[:2]
stimB = images_df[2:4]
stimC = images_df[4:6]
stimD = images_df[6:8]
stimE = images_df[8:10]
stimF = images_df[10:12]
stimG = images_df[12:14]
stimH = images_df[14:16]
stimI = images_df[16:18]
stimJ = images_df[18:20]
stimK = images_df[20:22]
stimL = images_df[22:24]

stim_rest = images_df[24:]
stim_rest

Unnamed: 0_level_0,body (animal),body (animal),body (animal),body (human),body (human),body (human),face (animal),face (animal),face (animal),face (human),...,fruit / vegetable,plant,plant,plant,tool,tool,tool,vehicle,vehicle,vehicle
Unnamed: 0_level_1,bat,elephant,fish,gymnast,surfer,tennis player,cat,cow,monkey,face child,...,tomato,flower,houseplant,tree,garden tools,hammer,hand tools,airplane,boat,train
24,bat_245.jpg,elephant_76.jpg,fish_126.jpg,gymnast_178.jpg,surfer_51.jpg,tennis_player_153.jpg,cat_97.jpg,cow_92.jpg,monkey_253.jpg,face_new_c_33.jpg,...,tomato_118.jpg,flower_85.jpg,houseplant_153.jpg,tree_89b.jpg,rake_145.jpg,hammer_2_10.jpg,hand tools_103.jpg,plane_469.jpg,boat_73.jpg,train_181.jpg
25,bat_246.jpg,elephant_77.jpg,fish_127.jpg,gymnast_186.jpg,surfer_52.jpg,tennis player_179.jpg,cat_124.jpg,cow_100.jpg,monkey_254.jpg,face_new_c_36.jpg,...,tomato_119.jpg,flower_87.jpg,houseplant_154.jpg,tree_107.jpg,rake_148.jpg,hammer_2_11.jpg,hand tools_109.jpg,plane_484.jpg,boat_84.jpg,train_184.jpg
26,bat_247.jpg,elephant_84.jpg,fish_129b.jpg,gymnast_190.jpg,surfer_55.jpg,tennis player_197.jpg,cat_129.jpg,cow_108.jpg,monkey_259.jpg,face_new_c_39.jpg,...,tomato_133.jpg,flower_97.jpg,houseplant_157.jpg,tree_111.jpg,rake_149.jpg,hammer_2_12.jpg,hand tools_118.jpg,plane_497.jpg,boat_85.jpg,train_185.jpg
27,bat_250.jpg,elephant_86.jpg,fish_129.jpg,gymnast_192.jpg,surfer_59.jpg,tennis player_204.jpg,cat_135.jpg,cow_112.jpg,monkey_260.jpg,face_new_c_43.jpg,...,tomato_136.jpg,flower_105.jpg,houseplant_158.jpg,tree_115.jpg,rake_166.jpg,hammer_2_13.jpg,hand tools_124.jpg,plane_518.jpg,boat_96.jpg,train_186.jpg
28,bat_253.jpg,elephant_91.jpg,fish_156.jpg,gymnast_193.jpg,surfer_66.jpg,tennis player_210.jpg,cat_143.jpg,cow_114.jpg,monkey_270.jpg,face_new_c_45.jpg,...,tomato_143.jpg,flower_122.jpg,houseplant_167.jpg,tree_118.jpg,garden tools_192.jpg,hammer_2_15.jpg,hand tools_137.jpg,plane_533.jpg,boat_107.jpg,train_219.jpg
29,bat_259.jpg,elephant_94.jpg,fish_175.jpg,gymnast_200.jpg,surfer_110.jpg,tennis player_213.jpg,cat_158.jpg,cow_136.jpg,monkey_272.jpg,baby's face_167.jpg,...,tomato_146.jpg,flower_126.jpg,houseplant_168.jpg,tree_128.jpg,garden tools_194.jpg,hammer_2_16.jpg,hand tools_139.jpg,plane_540.jpg,boat_155.jpg,train_247.jpg


In [None]:
# split rest of images into even and odd columns
stim_rest = images_df[24:]
evens = stim_rest.iloc[:, ::2].reset_index(drop=True)
odds = stim_rest.iloc[:, 1::2].reset_index(drop=True)

stimA = pd.concat([stimA, evens.loc[[0]]], ignore_index=True)
stimB = pd.concat([stimB, evens.loc[[1]]], ignore_index=True)
stimC = pd.concat([stimC, evens.loc[[2]]], ignore_index=True)
stimD = pd.concat([stimD, evens.loc[[3]]], ignore_index=True)
stimE = pd.concat([stimE, evens.loc[[4]]], ignore_index=True)
stimF = pd.concat([stimF, evens.loc[[5]]], ignore_index=True)
stimG = pd.concat([stimG, odds.loc[[0]]], ignore_index=True)
stimH = pd.concat([stimH, odds.loc[[1]]], ignore_index=True)
stimI = pd.concat([stimI, odds.loc[[2]]], ignore_index=True)
stimJ = pd.concat([stimJ, odds.loc[[3]]], ignore_index=True)
stimK = pd.concat([stimK, odds.loc[[4]]], ignore_index=True)
stimL = pd.concat([stimL, odds.loc[[5]]], ignore_index=True)

stimL

Unnamed: 0_level_0,body (animal),body (animal),body (animal),body (human),body (human),body (human),face (animal),face (animal),face (animal),face (human),...,fruit / vegetable,plant,plant,plant,tool,tool,tool,vehicle,vehicle,vehicle
Unnamed: 0_level_1,bat,elephant,fish,gymnast,surfer,tennis player,cat,cow,monkey,face child,...,tomato,flower,houseplant,tree,garden tools,hammer,hand tools,airplane,boat,train
0,bat_204.jpg,elephant_69.jpg,fish_118b.jpg,gymnast_167.jpg,surfer_48.jpg,tennis_player_146.jpg,cat_82.jpg,cow_75.jpg,monkey_249.jpg,baby's face_27.jpg,...,tomato_105.jpg,flower_79.jpg,houseplant_134.jpg,tree_85.jpg,garden tools_122.jpg,hammer_55.jpg,hand tools_98.jpg,plane_448.jpg,boat_58.jpg,train_178.jpg
1,bat_241.jpg,elephant_70.jpg,fish_118.jpg,gymnast_169.jpg,surfer_50.jpg,tennis_player_148.jpg,cat_93.jpg,cow_78.jpg,monkey_250.jpg,face_new_c_31.jpg,...,tomato_106.jpg,flower_82.jpg,houseplant_150.jpg,tree_89.jpg,rake_136.jpg,hammer_120.jpg,hand tools_101.jpg,plane_466.jpg,boat_72.jpg,train_180.jpg
2,,elephant_94.jpg,,gymnast_200.jpg,,tennis player_213.jpg,,cow_136.jpg,,baby's face_167.jpg,...,,flower_126.jpg,,tree_128.jpg,,hammer_2_16.jpg,,plane_540.jpg,,train_247.jpg


In [None]:
def partition(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i+n]
        

def reshape_multiindex(stim_df: pd.DataFrame) -> pd.DataFrame:
    stim_df.columns = stim_df.columns.get_level_values(0)
    s = stim_df.columns.to_series()
    stim_df.columns = [stim_df.columns, s.groupby(s).cumcount()]
    stim_df = (stim_df
                    .stack()
                    # .sample(frac=1, axis=0)
                    .apply(np.random.permutation, axis=0)
                    .reset_index(drop=True)
                )
    return stim_df


def list_blocks(stim_df_reshaped: pd.DataFrame) -> list[list[str]]:
    # takes df with 60 imgs
    # split into blocks -> 6 imgs per block (3 anim, 3 inanim)
    animate = "body (animal),body (human),face (animal),face (human)".split(',')
    inanimate = "fruit / vegetable,plant,tool,vehicle".split(',')

    animates = list(filter(lambda x: x == x, chain(*stim_df_reshaped[animate].values)))
    inanimates = list(filter(lambda x: x == x, chain(*stim_df_reshaped[inanimate].values)))

    assert len(animates) == len(inanimates)

    blocks_animate = list(partition(animates, 3))
    blocks_inanimate = list(partition(inanimates, 3))

    blocks = [imglist_a + imglist_i for imglist_a, imglist_i in zip(blocks_animate, blocks_inanimate)]

    # randomize order
    for block in blocks: np.random.shuffle(block)
    
    return blocks



# def list_blocks(stim_df_reshaped: pd.DataFrame) -> list[list[str]]:
#     # split last 3 rows by animate/inanimate
#     animate = "body (animal),body (human),face (animal),face (human)".split(',')
#     inanimate = "fruit / vegetable,plant,tool,vehicle".split(',')
#     rem = stim_df_reshaped.iloc[-3:]
#     rem_anim = list(rem[animate].iloc[0]) + list(rem[animate].iloc[1]) + list(rem[animate].iloc[2])
#     rem_inanim = list(rem[inanimate].iloc[0]) + list(rem[inanimate].iloc[1]) + list(rem[inanimate].iloc[2])
    
#     # split into blocks -> 10 imgs per block (5 anim, 5 inanim), 1 per category (with repeats)
#     blocks = []
#     for i in range(12):
#         blocks.append(
#             list(stim_df_reshaped.iloc[i]) + [rem_anim[i]] + [rem_inanim[i]]
#         )
#     # randomize order
#     for block in blocks: np.random.shuffle(block)
    
#     return blocks


def from_stim_set_blocks(stim_df: pd.DataFrame) -> list[list[str]]:
    reshaped_df = reshape_multiindex(stim_df)
    blocks = list_blocks(reshaped_df)

    return blocks

In [None]:
def get_key(item: str, d: dict) -> str:
    categories = filter(lambda category: item in d[category], d)
    return categories.__next__()

def assign_category(image: str) -> tuple[str, str]:
    term = get_key(image, image_dict)
    category = get_key(term, object_categories)

    return (image, category)

In [None]:
def create_blocks_final(blocks: list[list[str]]) -> list[list[str]]:
    for block in blocks:
        for i, image in enumerate(block):
            block[i] = assign_category(image)

    prefix_add = lambda t: ("dis_" + t[0], t[1])
    prefix_strip = lambda s: s if s[:3] != 'dis' else s[4:]

    for block in blocks:
        post_imgs = deepcopy(block)
        dis_imgs = [prefix_add(item) for item in block]
        np.random.shuffle(post_imgs)
        np.random.shuffle(dis_imgs)
        block.extend(dis_imgs + post_imgs)

    for block in blocks:
        for i in range(len(block) - 2):
            if prefix_strip(block[i][0]) == prefix_strip(block[i+1][0]):
                # print(block[i][0], block[i+1][0])
                block[i+1], block[i+2] = block[i+2], block[i+1]

    return blocks

In [None]:
# blocks = from_stim_set_blocks(stimA)

# # map images to category (i.e. reverse dict lookup: img -> search_term -> category)
# for block in blocks:
#     for i, image in enumerate(block):
#         block[i] = assign_category(image)

# prefix_add = lambda x: ("dis_" + x[0], x[1])

# for block in blocks:
#     post_imgs = deepcopy(block)
#     dis_imgs = [prefix_add(item) for item in block]
#     np.random.shuffle(post_imgs)
#     np.random.shuffle(dis_imgs)
#     block.extend(dis_imgs + post_imgs)

In [None]:
def create_stim_spreadsheet(blocks: list[list[str]]) -> pd.DataFrame:

    df = pd.DataFrame(chain(*blocks), columns=['image', 'answer'])

    # f = lambda l, r, t: np.tile(np.repeat(np.array(np.arange(1, l)), r), t)
    display = pd.Series(['pre', 'disambig', 'post']).repeat(10).reset_index(drop=True)
    randomise_trials = pd.Series(range(1,37)).repeat(10).reset_index(drop=True)
    randomise_blocks = pd.Series(range(1,13)).repeat(30).reset_index(drop=True)
    display_col = pd.concat([display] * 12).reset_index(drop=True)
    randomise_trials_col = pd.concat([randomise_trials] * 12).reset_index(drop=True)

    df['display'] = display_col
    df['progress'] = 1
    df['randomise_trials'] = randomise_trials_col
    df['randomise_blocks'] = randomise_blocks
    df['ShowProgressBar'] = np.nan
    df.astype({'randomise_blocks': 'int32', 'randomise_trials': 'int32', 'progress': 'int32'})

    # add block separators
    idxs = np.arange(-0.5, 330, 30)

    for i, idx in enumerate(idxs):
        df.loc[idx, 'display'] = 'block'
        df.loc[idx, 'text'] = f'Block {i+1} of 12'
    df = (df
          .sort_index()
          .reset_index(drop=True)
          .astype({'randomise_blocks': 'Int64', 'randomise_trials': 'Int64', 'progress': 'Int64'})
        )

    return df[['randomise_blocks', 'display', 'image', 
              'answer', 'text', 'progress', 'ShowProgressBar']]


In [None]:
stim_sets = stimA, stimB, stimC, stimD, stimE, stimF, stimG, stimH, stimI, stimJ, stimK, stimL

blocks_short = [from_stim_set_blocks(stim) for stim in stim_sets]
blocks_long = [create_blocks_final(blocks) for blocks in blocks_short]
spreadsheets = [create_stim_spreadsheet(blocks) for blocks in blocks_long]

# for i, sheet in enumerate(spreadsheets):
#     sheet.to_csv(f'spreadsheet{i+1}.csv', index=False)

In [None]:
spreadsheets[0]

Unnamed: 0,randomise_blocks,display,image,answer,text,progress,ShowProgressBar
0,,block,,,Block 1 of 12,,
1,1,pre,gymnast_9.jpg,body (human),,1,
2,1,pre,hammer_1.jpg,tool,,1,
3,1,pre,tomato_2.jpg,fruit / vegetable,,1,
4,1,pre,face_new_m_0.jpg,face (human),,1,
...,...,...,...,...,...,...,...
187,,block,,,Block 8 of 12,,
188,,block,,,Block 9 of 12,,
189,,block,,,Block 10 of 12,,
190,,block,,,Block 11 of 12,,


In [None]:
f = lambda x: x if x[:3] != 'dis' else x[4:]

for n, stim in enumerate(blocks_long):
    for m, block in enumerate(stim):
        for i in range(len(block) - 2):
            if f(block[i][0]) == f(block[i+1][0]):
                print(n, m)
                print(block[i][0], block[i+1][0])

In [None]:
# idxs = np.arange(-0.5, 330, 30)

# for i, idx in enumerate(idxs):
#     df.loc[idx, 'display'] = 'block'
#     df.loc[idx, 'text'] = f'Block {i+1} of 12'
# df = df.sort_index().reset_index(drop=True)
# df.head(90)