<a href="https://colab.research.google.com/github/kirawc/semantic-distance/blob/main/createStimuliLists.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# set up; import; define

In [None]:
# Import everything
import os, json
import pandas as pd
import random
import copy
import sqlite3
import sys
import numpy as np

In [None]:
# Define things
stimDict = {"animal":["a1", "a2", "a3","a4","a5","a6","a7","a8","a9","a10"],
            "instruments":["i1", "i2", "i3","i4","i5","i6","i7","i8","i9","i10"],
            "tools":["t1", "t2", "t3","t4","t5","t6","t7","t8","t9","t10"]};

# create final splits

In [None]:
# Run functions

trioDict = makeTrios(stimDict)
allSplits = makeSplits(trioDict,5)



[['a', 'a1', ('a4', 'a9')],
 ['a', 'a1', ('a3', 'a9')],
 ['a', 'a1', ('a5', 'a6')],
 ['a', 'a1', ('a2', 'a8')],
 ['a', 'a1', ('a4', 'a8')],
 ['a', 'a1', ('a8', 'a9')],
 ['a', 'a1', ('a5', 'a9')],
 ['a', 'a2', ('a1', 'a7')],
 ['a', 'a2', ('a5', 'a9')],
 ['a', 'a2', ('a3', 'a7')],
 ['a', 'a2', ('a3', 'a6')],
 ['a', 'a2', ('a3', 'a4')],
 ['a', 'a2', ('a5', 'a8')],
 ['a', 'a2', ('a3', 'a8')],
 ['a', 'a3', ('a1', 'a9')],
 ['a', 'a3', ('a4', 'a9')],
 ['a', 'a3', ('a2', 'a4')],
 ['a', 'a3', ('a10', 'a4')],
 ['a', 'a3', ('a4', 'a7')],
 ['a', 'a3', ('a8', 'a9')],
 ['a', 'a3', ('a5', 'a7')],
 ['a', 'a4', ('a5', 'a9')],
 ['a', 'a4', ('a3', 'a5')],
 ['a', 'a4', ('a7', 'a8')],
 ['a', 'a4', ('a6', 'a7')],
 ['a', 'a4', ('a10', 'a7')],
 ['a', 'a4', ('a10', 'a6')],
 ['a', 'a4', ('a1', 'a2')],
 ['a', 'a5', ('a1', 'a2')],
 ['a', 'a5', ('a4', 'a8')],
 ['a', 'a5', ('a6', 'a7')],
 ['a', 'a5', ('a3', 'a8')],
 ['a', 'a5', ('a3', 'a6')],
 ['a', 'a5', ('a3', 'a4')],
 ['a', 'a5', ('a10', 'a7')],
 ['a', 'a6', ('a

# create combinations


In [None]:
def makeTrios(dict):

  """
  Expands original dict 
  Each stim is now a dict with lists of each possible pair of options, excluding repeats

  structure -- 
  {CATEGORY1: {STIM1:[pair1A, pair1B], STIM2:[pair2A,pair2B]}, CATEGORY2: .... }
  """
  
  resdict = {} # the final 
  categories = list(dict.keys())

  for cat in categories:  # Loop through each category

    stimlist = list(dict[cat]) # get category
    promptDict = {}
    
    for prompt in stimlist: # Loop through all prompts

      everyPairForThisPrompt = []; # reset list of pairs for this prompt  
      optionList1 = [x for x in stimlist if x != prompt]

      for i in range(len(optionList1)): # Loop through all but current prompt

          opt1 = optionList1[i];
          optionList2 = [x for x in optionList1 if x != opt1]

          for j in range(len(optionList2)): # Loop through all but prompt & opt1

            opt2 = optionList2[j];
            cpair = (opt1, opt2)
            everyPairForThisPrompt.append(cpair)
      
      pairs = removeDuplicates(everyPairForThisPrompt);
      promptDict[prompt] = pairs; 
    
      resdict[cat] = promptDict; 

  return resdict

In [None]:
def removeDuplicates(lst): 
      
  res = list(set(tuple(sorted(sub)) for sub in lst)) 

  return res

# create randomized splits

In [None]:
makeSplits(trioDict,5)

In [None]:
def makeSplits(dict, nSplit):

  """
  (Psuedo) Randomize which trials [prompt & option pairs] a participant will see
  1 participant = 1 split
  ? splits = 1 set

  Pseudo random bc even nums of category & prompt 
  So every participant has the same ex posure to each category & each prompt
  """

  # INPUT STRUCTURE
  # {category: {prompt1:[opt1A,op2A],....}, {prompt2: [opt1B, opt2B],....}}
  allDict = dict;
  allList = list(allDict.keys()) # dictionary with everycategory 

  # OUTPUT STRUCTURE
  # res = [  [ [a, a1,a2,a3], [v, b1,b2,b3] ], [ ]]
  # list of splits with trios
  res = []
  resSplits = [[] for i in range(nSplit)]
  modalities = ["a","v"]

  for cat in allDict: # loop through outer categories

    catDict = allDict[cat]; # dictinary with current category
    prompts = list(allDict[cat].keys()) # keys for current dictionary

    for modality in modalities:
      
      for prompt in prompts:

        optpairs = list(catDict[prompt]) # options for this prompt for this category 
        trios = [[modality, prompt] + [lst] for lst in optpairs]
        shuffledTrios = random.sample(trios,len(trios)) # shuffle up the options
        splitList = split_list(shuffledTrios,nSplit)

        for n in range(nSplit):
          resSplits[n].extend(splitList[n])
  
  return resSplits

In [None]:
def split_list(alist, wanted_parts):
    length = len(alist)
    return [ alist[i*length // wanted_parts: (i+1)*length // wanted_parts] 
             for i in range(wanted_parts) ]

In [None]:
##### JUNK

  categoryList = list(dict.keys())

  # loop through each category
  for cat in categories: 
    promptDict = dict[cat];
    prompts = promptDict.keys();

    for prompt in prompts:
      
      ckey = promptDict[prompt];
      optionPairs = promptDict[ckey]; # every 
      nEach = len(list(optPairs))/nSplit; # 
      shuffledPairs = random.sample(optionPairs,len(optionPairs)) # shuffle the full list
      print(shuffledPairs)
      counter = 0;      


dict = trioDict;
categoryList = list(dict.keys())

nSplit = 2;

promptDict = trioDict[categoryList[0]]
prompts = list(promptDict.keys());
optionPairs = list(promptDict[prompts[0]])
nPairs = len(optionPairs)

#print(optionPairs)

shuffledOptPairs = random.sample(optionPairs,nPairs)
shuffledOptPairs2 = random.sample(optionPairs,nPairs)
finalList = []

for i in range(len(shuffledOptPairs)):
  pair = shuffledOptPairs[i]
  trio = ["a3"];
  trio.append(pair)
  finalList.append(trio)

print(finalList)