In [128]:
import numpy as np
import pandas as pd
import random
from itertools import chain
from pathlib import Path


np.random.seed(0)

In [129]:
root = Path.cwd() / 'stim_set_final_by_cat'
searchterm_paths = sorted([f for f in root.iterdir() if f.is_dir()])
search_terms = [p.name for p in searchterm_paths]

assert len(search_terms) == 24
print(search_terms)

['airplane', 'bat', 'boat', 'broccoli', 'cat', 'cow', 'elephant', 'face child', 'face female', 'face male', 'fish', 'flower', 'garden tools', 'gymnast', 'hammer', 'hand tools', 'houseplant', 'monkey', 'pineapple', 'surfer', 'tennis player', 'tomato', 'train', 'tree']


In [130]:
# class - anim x inanim

classification = {'animate': ['bat', 'cat', 'cow', 'elephant', 'face child', 'face female', 
                        'face male', 'fish', 'gymnast', 'monkey', 'surfer', 'tennis player']}
classification['inanimate'] = [cat for cat in search_terms if cat not in classification['animate']] # rest

assert len(classification['animate']) == len(classification['inanimate'])


# object category

object_categories = {
    "body (human)":     ['gymnast', 'surfer', 'tennis player'],
    "face (human)":     ['face child', 'face female', 'face male'],
    "body (animal)":    ['bat', 'elephant', 'fish'],
    "face (animal)":    ['cow', 'cat', 'monkey'],
    "plant":            ['flower', 'houseplant', 'tree'],
    "fruit/vegetable":  ['broccoli', 'tomato', 'pineapple'],
    "tool":             ['garden tools', 'hammer', 'hand tools'],
    "vehicle":          ['airplane', 'boat', 'train'],
}

assert len(list(chain(*object_categories.values()))) == 24

In [131]:
image_dict = {}

for i, category in enumerate(search_terms):
    image_dict[category] = sorted([str(imgpath).split("/")[-1] for imgpath in searchterm_paths[i].glob('*jpg') 
                                        if str(imgpath).split("/")[-1][:3] != 'dis'], 
                                        key=lambda x: int(''.join(filter(str.isdigit, str(x))))
                                        )

assert all([len(image_dict[k]) >= 30 for k in image_dict.keys()])

In [132]:
# overall - 120 imgs / 8 categories = 15 imgs / category
from copy import deepcopy
images_by_categories = deepcopy(object_categories)

for category in images_by_categories:
    for i, search_term in enumerate(images_by_categories[category]):
        images_by_categories[category][i] = image_dict[search_term]

In [133]:
# convert to df for easier manipulation

object_categories_astuples = []

for category in object_categories:
    for search_term in object_categories[category]:
        object_categories_astuples.append((category, search_term))

columns = pd.MultiIndex.from_tuples(sorted(object_categories_astuples))

images_df = pd.DataFrame.from_dict(image_dict, orient='index').T.dropna(how='any')
images_df = images_df[[c[1] for c in columns]]
images_df.columns = columns

assert len(images_df.index) == 30
images_df.head()

Unnamed: 0_level_0,body (animal),body (animal),body (animal),body (human),body (human),body (human),face (animal),face (animal),face (animal),face (human),...,fruit/vegetable,plant,plant,plant,tool,tool,tool,vehicle,vehicle,vehicle
Unnamed: 0_level_1,bat,elephant,fish,gymnast,surfer,tennis player,cat,cow,monkey,face child,...,tomato,flower,houseplant,tree,garden tools,hammer,hand tools,airplane,boat,train
0,bat_0.jpg,elephant_0.jpg,fish_2.jpg,gymnast_0.jpg,surfer_0.jpg,tennis player_6.jpg,cat_0.jpg,cow_0.jpg,monkey_9.jpg,face_new_b_0.jpg,...,tomato_1.jpg,flower_0.jpg,houseplant_4.jpg,tree_1.jpg,rake_0.jpg,hammer_1.jpg,hand tools_1.jpg,airplane_0.jpg,boat_2.jpg,train_0.jpg
1,bat_3.jpg,elephant_1.jpg,fish_3.jpg,gymnast_9.jpg,surfer_2.jpg,tennis player_11.jpg,cat_2.jpg,cow_2.jpg,monkey_43.jpg,baby's face_4.jpg,...,tomato_2.jpg,flower_12.jpg,houseplant_7.jpg,tree_4.jpg,rake_6.jpg,hammer_2.jpg,hand tools_3.jpg,plane_1.jpg,boat_3.jpg,train_2.jpg
2,bat_5.jpg,elephant_2.jpg,fish_8.jpg,gymnast_15.jpg,surfer_6.jpg,tennis_player_20.jpg,cat_4b.jpg,cow_3.jpg,monkey_49.jpg,face_new_b_5.jpg,...,tomato_3.jpg,flower_14.jpg,houseplant_8.jpg,tree_8.jpg,rake_8.jpg,hammer_3.jpg,hand tools_11.jpg,plane_5.jpg,boat_10.jpg,train_4.jpg
3,bat_6.jpg,elephant_5.jpg,fish_14.jpg,gymnast_25.jpg,surfer_12.jpg,tennis player_22.jpg,cat_4.jpg,cow_4.jpg,monkey_67.jpg,baby's face_8.jpg,...,tomato_15.jpg,flower_18.jpg,houseplant_15.jpg,tree_10.jpg,rake_10.jpg,hammer_9.jpg,wrench_18.jpg,airplane_8.jpg,boat_11.jpg,train_6.jpg
4,bat_7.jpg,elephant_6.jpg,fish_15.jpg,gymnast_28.jpg,surfer_13.jpg,tennis player_23.jpg,cat_8.jpg,cow_5.jpg,monkey_97.jpg,face_new_c_9.jpg,...,tomato_20.jpg,flower_22.jpg,houseplant_22.jpg,tree_10b.jpg,rake_11.jpg,hammer_2_0.jpg,hand tools_20.jpg,airplane_12.jpg,boat_12.jpg,train_11.jpg


In [134]:
# per one stim set - 120 imgs / 8 categories = 15 imgs / category
# => split into 6 sets every 5 rows

stimA = images_df[:5]
stimB = images_df[5:10]
stimC = images_df[10:15]
stimD = images_df[15:20]
stimE = images_df[20:25]
stimD = images_df[25:30]

In [137]:
def reshape_multiindex(stim_df: pd.DataFrame) -> pd.DataFrame:
    stim_df.columns = stim_df.columns.get_level_values(0)
    s = stim_df.columns.to_series()
    stim_df.columns = [stim_df.columns, s.groupby(s).cumcount()]
    stim_df = (stim_df
                    .stack()
                    # .sample(frac=1, axis=0)
                    .apply(np.random.permutation, axis=0)
                    .reset_index(drop=True)
                )
    return stim_df


def create_blocks(stim_df_reshaped: pd.DataFrame) -> list[list[str]]:
    # split last 3 rows by animate/inanimate
    animate = "body (animal),body (human),face (animal),face (human)".split(',')
    inanimate = "fruit/vegetable,plant,tool,vehicle".split(',')
    rem = stim_df_reshaped.iloc[-3:]
    rem_anim = list(rem[animate].iloc[0]) + list(rem[animate].iloc[1]) + list(rem[animate].iloc[2])
    rem_inanim = list(rem[inanimate].iloc[0]) + list(rem[inanimate].iloc[1]) + list(rem[inanimate].iloc[2])
    
    # split into blocks -> 10 imgs per block (5 anim, 5 inanim), 1 per category (with repeats)
    blocks = []
    for i in range(12):
        blocks.append(
            list(stim_df_reshaped.iloc[i]) + [rem_anim[i]] + [rem_inanim[i]]
        )
    # randomize order
    for block in blocks: np.random.shuffle(block)
    
    return blocks


def from_stim_set_blocks(stim_df: pd.DataFrame) -> list[list[str]]:
    reshaped_df = reshape_multiindex(stim_df)
    blocks = create_blocks(reshaped_df)

    return blocks

In [138]:
from_stim_set_blocks(stimA)

[['train_2.jpg',
  'fish_2.jpg',
  'broccoli_38.jpg',
  'flower_18.jpg',
  'cow_5.jpg',
  'elephant_0.jpg',
  'face_new_m_2.jpg',
  'pineapple_22.jpg',
  'gymnast_25.jpg',
  'hammer_1.jpg'],
 ['rake_8.jpg',
  'houseplant_4.jpg',
  'bat_0.jpg',
  'boat_2.jpg',
  'tennis_player_20.jpg',
  'pineapple_47b.jpg',
  'cat_4.jpg',
  'face_new_b_5.jpg',
  'gymnast_28.jpg',
  'tree_8.jpg'],
 ['hand tools_3.jpg',
  'bat_5.jpg',
  'cow_4.jpg',
  'faces_new_2.jpg',
  'airplane_12.jpg',
  'hand tools_1.jpg',
  'flower_0.jpg',
  'cow_3.jpg',
  'gymnast_15.jpg',
  'broccoli_35.jpg'],
 ['surfer_6.jpg',
  'elephant_6.jpg',
  'plane_5.jpg',
  'face_new_f_10.jpg',
  'tree_1.jpg',
  'monkey_43.jpg',
  'broccoli_41.jpg',
  'face_new_c_9.jpg',
  'hammer_9.jpg',
  'boat_3.jpg'],
 ['train_11.jpg',
  'tree_10.jpg',
  'pineapple_44.jpg',
  'hammer_2_0.jpg',
  'surfer_12.jpg',
  'face_new_m_4.jpg',
  'cow_0.jpg',
  'tomato_1.jpg',
  'elephant_1.jpg',
  'fish_8.jpg'],
 ['houseplant_15.jpg',
  'face_new_m_0.jpg',
  