## Depth Duration: Stimulus Counterbalancing

### Sequence Class 

Creates a sequence such that each block is balanced in terms of depth and duration/



In [1]:
import os, json
import pandas as pd
import random 
import copy 
import sqlite3
import sys
import numpy

In [138]:
sequence_count = 0
#encapsulates all data points for an object (i.e. image) --> all image characteristics are accessible
class Observation:
    def __init__(self, parent, subdir, depth):
        self.parent = parent
        self.subdir = subdir
        self.depth = depth
    def getParent(self):
        return self.parent
    def getSubdir(self):
        return self.subdir
    def getDepth(self):
        return self.depth 

class Observation_bins: #4 bins organized by depth (1-2m, 2-3m, 3-4m, 4-5m)
    def __init__(self):
        self.bins = [[], [], [], []] #structure that holds organized bins 
        self.blocks = 4
        self.stims_per_block = 48 
        self.num_bins = 4
        
    def addObservation(self, obs): #puts image observations in the correct bin by depth 
        depth = obs.getDepth()
        if depth >= 1 and depth < 2:
            self.bins[0].append(obs)               
        elif depth >= 2 and depth < 3:
            self.bins[1].append(obs)
        elif depth >= 3 and depth < 4:
            self.bins[2].append(obs)
        elif depth >= 4 and depth < 5:
            self.bins[3].append(obs)
        else:
            sys.exit("Depth out of bounds") #flag for if depth is less than 1 or greater than 5
            
    #randomly selects an image from a specified bin based on length of bin 
    def getObservation(self, bin_number): 
        if len(self.bins[bin_number]) == 0:
            sys.exit("Bin is empty --> pseudo random solution failed, try again")

        random_number = int(random.random() * len(self.bins[bin_number]))
#         print("Length of bin" + str(bin_number)+":",len(self.bins[bin_number]) )
#         print("Random number", random_number)

        return self.bins[bin_number][random_number]
    
    #removes all observations with the same parent (prevents preview effect)
    def _deleteParent(self, sample_parent): #_ i.e. protocol; not called independently 
        deleted = 0
        for i in range(len(self.bins)):  
            for j in reversed(range (len(self.bins[i]))): #reversed because length of list shrinks as elements are deleted
                parent = self.bins[i][j].getParent() 
                if parent == sample_parent:

                    self.bins[i].pop(j)
                    deleted += 1
                    
        #print(deleted, "deleted")
        
    
    #takes in the image name and returns the depth of the target in that image by accessing the object instance associated with that image    
    def findObservationDepth(self, stim):
        for i in range(len(self.bins)):
            for j in reversed(range (len(self.bins[i]))): #reversed because length of list shrinks as elements are deleted
                img_subdir = self.bins[i][j].getSubdir()
                if img_subdir == stim:
                    img_depth = self.bins[i][j].getDepth()
                    return img_depth
                    
                
    
    
    #generates image sequence 
    def makeSequence(self): 
        # IMPORTANT: This is a member function. It is called on an instance of the class (clear because self is passed)
        # So for example, when x.makeSequence() is called this function is operating within the instance of the class, which is x
        # If I want to call other member functions, within this function, they should be called on self NOT x 
     
        
        #can add can option to create a folder with these images (self, destination aka name for sequence folder)
        #if destination == "" then don't create sequence folder 
        # https://docs.python.org/2/library/copy.html; Need an immutable copy function, i.e. deepcopy 
        bins_backup = copy.deepcopy(self.bins)
        s1_stim = []
        for i in range(self.blocks): #creates 2d list based on num of blocks
            s1_stim.append([])

        check_dict = {}
        img_duration = {} #key = image, value = assigned duration
        #count variables verify that there are 64 images per duration total 
        count250 = 0
        count500 = 0
        count750 = 0
        count1000 = 0
        for block in range(self.blocks): #4 blocks in the experiment
            for stim_num in range(int(self.stims_per_block/self.num_bins)): # 64 images in each block
                for bin_num in range(self.num_bins):
                    #randomly sampled observation 
                    sample_obs = self.getObservation(bin_num)
                    sample_depth = sample_obs.getDepth()
                    sample_parent = sample_obs.getParent()
                    #call a function to delete that parent from the list of images to prevent duplicates 
                    self._deleteParent(sample_parent)
                    sample_image = sample_obs.getSubdir()
                    #adds image filename to sequence list 
                    s1_stim[block].append(sample_image)
                    #add image names to dictionary to ensure no duplicate images are added 
                    if sample_image not in check_dict:
                        check_dict[sample_image] = 1
                    else:
                        sys.exit("Duplicate found: " + sample_image)
                        
                    # Duration sequence: 16 images per bin per block per duration 
                    # Ex. in block 1 there are 16 images from bin1 @ 250 ms 
                    # Ideally should not be hard coded ... 
                    
                    #first 16 images (stim_num 0-3)
                    if stim_num <= 3: 
                        img_duration[sample_image] = 250
                        count250 += 1
                    #second 16 images (stim_num 4-7)
                    elif stim_num > 3 and stim_num <= 7:
                        img_duration[sample_image] = 500
                        count500 += 1
                    #third 16 images (stim_num 8-11)
                    elif stim_num > 7 and stim_num <= 11:
                        img_duration[sample_image] = 750
                        count750 +=1
                    #last 16 images (stim_num 12-15)
                    elif stim_num > 11 and stim_num <= 15:
                        img_duration[sample_image] = 1000
                        count1000 += 1
                        

        #randomly shuffle the elements of each block 
        for block in s1_stim:
            random.shuffle(block)
            
        #generate duration sequence based on the SHUFFLED order of selected images 
        s1_duration_seq = []
        for i in range(self.blocks): #creates 2d list based on num of blocks
            s1_duration_seq.append([])
            
        for block in range(self.blocks):
            for img in s1_stim[block]:
                duration = img_duration[img]
                s1_duration_seq[block].append(duration)
                
        self.bins = bins_backup #resets the main bins list back to the original for the next sequence 
        
                
        # Database structure 

        # sequence_A = [(sequence_A, img1, bin1-2, 500), (sequence_A, img2, bin4-5, 250)...]
        # TUPLE = (sequence name, img name, duration, order of presentation = 1 if the first image, depth )
        entry = []
        for block in s1_stim:
            for stim in block:
                 ## finding the number of presentation of the image in the overall sequence ##
                block_index = s1_stim.index(block)
                index_in_block = block.index(stim) + 1 #plus one so that indices start at 1 not zero 
                overall_index = len(block)*block_index + index_in_block
                
                #indexes to find corresponding duration for the image 
                duration = s1_duration_seq[block_index][index_in_block-1]    
                
                depth = self.findObservationDepth(stim)
                img_list = ['placeholder_sequence_name', stim, duration, overall_index, depth]
                #convert into tuple later once sequence name is known
                entry.append(img_list)
                
        
        # Removes images that were sampled in s1_stim (current sequence)
        count = 0
        for i in range(len(self.bins)):
            for j in reversed(range (len(self.bins[i]))): #reversed because length of list shrinks as elements are deleted
                subdir = self.bins[i][j].getSubdir()                    
                for block in s1_stim:
                    if subdir in block:
                        self.bins[i].pop(j)
                        count += 1
                        
        #print([count250, count500, count750, count1000])
        
        return s1_stim, s1_duration_seq, entry
    
               
def getTargetInfo(directory):
    """
    Indexes into the json file of each image and recursively extracts image characteristics
    All object (target image) instances are added to obs_bins (main list of all images)
    Args: 
        directory = path to cleaned stimuli folder
    Returns:
        obs_bins = instance of Observation_bins class that has depth_ob (instance) for every target image
    """
#     print("running")
    
    obs_list = [] #[parent, filepath, depth] for all of the images 
    obs_bins = Observation_bins() #instance of the class 
    for subdir, dirs, files in os.walk(directory): #recursively goes through all the folders within ltq
        for file in files:
            filepath = subdir + os.sep + file
            if ".ipynb_checkpoints" not in str(subdir):
                if filepath.endswith(".json"):
                    output_json = json.load(open(filepath)) #loads each data.json file from ltq
                    objects = output_json['objects'] 
                    for obj in objects:
                        cp = obj["crossing_point"]
                        cp = cp[0] #indexes to the dict
                        depth = cp['depth']
                        
                        elem_split = subdir.split("/")
                        img = elem_split[-1] #image folder is the last element of the path
                        img_split = img.split("_")
                        parent = img_split[0]
                        depth_ob = Observation(parent, subdir, depth) #creating an instance
                        obs_bins.addObservation(depth_ob) #adding one object to another (an observation to the bins list)
    return obs_bins        
        
# obs_bins = getTargetInfo() #called on directory where images are stored 

obs_bins = getTargetInfo("/Users/prachi/Documents/depth_duration/mar3_depthDuration_stimuli/targetImages_kinect2data_subset")

#### MAIN PROGRAM BEGINS HERE #### 
  
def generateFourSequences(obs_bins, mother_group):
    """
    After a sequence is made, those target images (not parent) are removed from the master list 
    Once 4 sequences can be made in this way because of stimuli constraints
    NOTE: sometimes 4 sequences might not work because of the variability of random sampling - in that case exception is thrown
    """
    #list of the names of the sequences 
    #Mother group is used now since sequences are made in groups of 4
    #seq_names = [mother_group+"1", mother_group+"2", mother_group+"3", mother_group+"4"]
    seq_names = [mother_group+"1"]

    lst_seq = []
    for i in range(len(seq_names)): #generate 4 sequences, add entry to list of sequences
        output = obs_bins.makeSequence()
        entry = output[2]
        lst_seq.append(entry)

    entry_list = [] #list of image tuples for all trials in all sequences
    for seq in lst_seq:
        for item in seq:
            index = lst_seq.index(seq)
            item[0] = "sequence_" + seq_names[index] #replaces "placeholder sequence name" with actual name 
            item = tuple(item) #converts list to tuple so it can be added to the database 
            entry_list.append(item)
                        
            
    return entry_list


class Group:
    def __init__(self, sequences):
        self.sequences = sequences #listofGroupSequences --> [(), (), ...] --> tuples for the trials for all four sequences of that group
    
    def getNumParents(self):
        """
        Member function of Group class 
        Returns number of unique parents in the group 
        """
        
        lst_group_parents = []
        for trial in self.sequences:
            folder_path = trial[1] #path to parent folder
            fp_split = folder_path.split("/")
            folder = fp_split[-1]
            folder_split = folder.split("_")
            parent = folder_split[0] #isolated parent image name
            lst_group_parents.append(parent)
                
#         print("Parent List length: ", len(lst_group_parents)) #should be 256 * 4 = 1024
        
        set_groupParents = set(lst_group_parents) #convert to set so duplicates are removed
#         print("Parent Set length: ", len(set_groupParents)) 

        return len(set_groupParents) #total number of unique parents in the group
    
    def returnGroup(self):
        """
        Member function of Group class 
        Returns the group (4 sequences) in its original form 
        """
        return self.sequences 
        

def findBestGroup(stimuli_path):
    """
    Generates groups of 4 sequences 
    Returns the group that has the maximum amount of unique parent images 

    """
    print("Testing group seq generation")
    
    complete_entry = []
        
    group_names = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L" ,"M", 
                   "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]    
    
    
    for name in group_names:
        obs_bins = getTargetInfo(stimuli_path) #resets obs_bins after 4 sequences are made 
        listofGroupSequences = generateFourSequences(obs_bins, name) #1D list of tuples 
        group_entry = Group(listofGroupSequences) #Group is a class 
        # group_entry is an instance of the class 
        complete_entry.append(group_entry) #list of group objects 
    
    #https://www.agnosticdev.com/content/how-sort-objects-custom-property-python
    #sort complete_entry based on number of parents in each group 
    #getNumParents() is a member function
    # lambda specifies that I am running a function on each element
    # it loops through all elements (groups) of complete_entry (element = x)
    # reverse = True --> maximum to minimum 
    
    complete_entry.sort(key=lambda x: x.getNumParents(), reverse=True)
    
    for group in complete_entry:
        print("Number of Unique Parents in Group:" + str(group.getNumParents()))
        
    max_group = complete_entry[0].returnGroup() #0 because complete_entry is ordered max --> min
        
    return max_group 

def foo(stimulus_path):
    """
    Restarts findBestGroup after exception (random sequence solution fails)
    Args:
        stimulus_path = SUN-RGBD cleaned stimuli folder 
    Returns:
        Group of sequences that has the maximum number of parent images
        - list of trial tuples for four sequences (ex. a1, a2, a3, a4)
        - this list should be inputted to database 
    """
    while True:
        try:
            bestGroup = findBestGroup(stimulus_path)
            return bestGroup
        except:
            pass
        else:
            break
            
path = "/Users/prachi/Documents/depth_duration/mar3_depthDuration_stimuli/targetImages_kinect2data_subset"
Group_max_parents = foo(path)
# print(Group_max_parents)



Testing group seq generation
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Parents in Group:192
Number of Unique Pare

#### List of cleaned target stimuli (Used to select a group of balanced 192 images)

Criterion
- good depth tagging 
- minimal camera rotation 
- target placement is not ambiguous 
- no people in the scene 


In [139]:
cleaned_target_stim = []

for folder in os.listdir(path):
    if folder != '.DS_Store':
        cleaned_target_stim.append(folder)
    
print(len(cleaned_target_stim))

1405


## Optmized Sequence Pipeline 

### List of images: even spread of depth

[stimuli ordered from 1m to 5m] = 192 total images 

In [140]:
import numpy as np

depth_ob_output = Group_max_parents

stimuli = [[trial[1], trial[4]] for trial in depth_ob_output]


In [2]:
# stimuli

#### Image Folders include in the depth_ob sequence

Select 4 images for the practice trials (3) and example image that are NOT in the list below

In [189]:
folders_num = [trial[0].split('/')[-1].split('_')[0] for trial in stimuli]

folders_num.sort() 
# folders_num

#### Create folder with target image folders selected by depth_ob code

In [141]:
import os 
import shutil 

dest = '/Users/prachi/Documents/depth_duration/mar3_depthDuration_stimuli/final_stimuli'

for stim in stimuli:
    folderpath = stim[0]
    dest_folder = dest  + '/' + stim[0].split('/')[-1]
    destination = shutil.copytree(folderpath, dest_folder)  

In [142]:
# confirm that there are 192 images 

count = 0
for folder in os.listdir(dest):
    count += 1
print("Number of images: ", count)

Number of images:  192


#### Sort trials by depth

In [144]:
def Sort_Depth(sub_li):
    """
    Sort the tuples using the second element
    Inplace way to sort using sort()
    """
    # reverse = None (Sorts in Ascending order)
    # key is set to sort using second element of
    # sublist lambda has been used
    sub_li.sort(key = lambda x: x[1])
    return sub_li

# conditions = random permutations list flattened so that each elem has a numerical assignment which will evenly distribute the durations
# PERMUTATIONS should be 0, 1, 2, 3
# then put them into blocks strategically by sampling one at a time and adding to array
sorted_stimuli_depth = Sort_Depth(stimuli)


In [145]:
sorted_stimuli_depth[0]

['/Users/prachi/Documents/depth_duration/mar3_depthDuration_stimuli/targetImages_kinect2data_subset/002272_2014-06-28_18-53-56_260595134347_rgbf000067-resize_2',
 1.138]

### Assign duration condition in block structure to create sequence 

48 images in each block --> 12 images at each duration


In [146]:
import itertools

def get_permutations():
    permutations = list(itertools.permutations([250, 500, 750, 1000])) * 2
    random.shuffle(permutations)

    # list of duration conditions 
    permutations = [item for tup in permutations for item in tup] 
    
    return permutations

def Sort_Duration(sub_li):
    """
    Sort the tuples using the third element
    Inplace way to sort using sort()
    """
    # reverse = None (Sorts in Ascending order)
    # key is set to sort using second element of
    # sublist lambda has been used
    sub_li.sort(key = lambda x: x[2])
    return sub_li

def sequence(sorted_stimuli):
    """
    Args:
        Stimuli sorted by DEPTH 
    Returns:
        Balanced sequence by depth and duration
    """
    permutations = get_permutations()
    
    stim_depth_dur = copy.deepcopy(sorted_stimuli_depth)
    for i in range(len(stim_depth_dur)):
        stim_depth_dur[i].append(permutations[i])

    sorted_stim_depth_dur = Sort_Duration(stim_depth_dur)
    
    test_seq = [[], [], [], []]
    test = copy.deepcopy(sorted_stim_depth_dur)
    
    for i in range(4):
        # i = index for duration
        for j in range(4):
            # j = index for depth bin
            # trials at duration i, in depth bin j --> e.g. all trials at 250 ms with depths between 1-2m
            temp = copy.deepcopy(test[48*i:48*(i+1)][12*j:12*(j+1)])
            random.shuffle(temp)
            # split list into 4 for each block
            # 3 images for for each depth bin for each duration 
            temp = [temp[0:3], temp[3:3*2], temp[3*2:3*3], temp[3*3:3*4]]   
            random.shuffle(temp)
            # add the split lists to the sequence blocks 
            for k in range(4):
                for elem in temp[k]:
                    test_seq[k].append(elem)

    # randomly shuffle trials in each block

    for block in test_seq:
        random.shuffle(block)
    
    return test_seq


#### Create 48 sequences using different duration permutations

In [147]:
def main(sorted_stimuli, num):
    """
    Args:
        sorted_stimuli = Stimuli sorted by DEPTH 
        num = number of sequences to be generated
    Returns:
        list of sequences 
    """
    list_sequences = {}
    label = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
              'ab', 'ac', 'ad', 'ae', 'af', 'ag', 'ah', 'ai', 'aj', 'ak', 'al', 'am', 'an', 'ao', 'ap', 'aq', 'ar', 'as', 'at', 'au', 'av', 'aw', 'ax', 'ay', 'az']
    for i in range(num):
        # sequence seperated by block
        seq = sequence(sorted_stimuli)
        # flattens sequence 
        seq = [item for sublist in seq for item in sublist]
        list_sequences[label[i]] = seq
    
    return list_sequences


In [148]:
sequences_48 = main(sorted_stimuli_depth, 48)

In [149]:
# sequences_48.keys()

len(sequences_48['a'])

192

In [192]:
def create_sequence_jsons(list_of_sequences, destination):

    sequence_dictionaries = []
    for key in list_of_sequences.keys():
        seq = [] 
        sequence_name = key
        num = 0
        for trial in list_of_sequences[key]:
            dict_trial = {}
            dict_trial["sequence"] = sequence_name
            dict_trial["image"] = trial[0]
            dict_trial["duration"] = trial[2]
            dict_trial["num"] = num
            dict_trial["depth"] = trial[1]
            local_imgpath = trial[0]
            img_num = local_imgpath.split("/")[-1]
            # this has to be the path on the server
            ogimg_path = "depth_duration_stimuli/" + img_num + '/' + img_num +'-original.jpg'
            targetimg = "depth_duration_stimuli/" + img_num + '/' + img_num + '-target.png'
            dict_trial["image_path"] = ogimg_path
            dict_trial["image_path_target"] = targetimg
            dict_trial["mask_path"] = "masks/mask_" + str(num) + ".jpg"
            dict_trial["fixation_path"] = "fixation.jpg"

            seq.append(dict_trial)
            num += 1

        sequence_dictionaries.append(seq)
    
    for sequence in sequence_dictionaries:
        name = sequence[0]["sequence"]
        path = destination + '/' + name + '.json'
        #creates json file for the sequence 
        with open(path, 'w') as f:
            json.dump(sequence , f)

    

#### Create a json file for each sequence

In [193]:
jsons_destination = '/Users/prachi/Documents/depth_duration/SUN-RGBD_stimuli_prep/V2_sequences'

create_sequence_jsons(sequences_48, jsons_destination)

#### Create Duration Rotated Sequences 

- goal is for each image to be seen at each duration across all participants 
- duration effects for individual target images can then be analyzed

In [194]:
import json 

def load_master_sequence(jsonpath):
    return json.load(open(jsonpath))

def rotate_sequence(previous_seq):
    """
    Rotates each trial's duration assignment based on previous sequence 
    250 --> 500
    500 --> 750
    750 --> 1000
    1000 --> 250
    """
    rotated = previous_seq
    for i in range(len(previous_seq)):
        duration = previous_seq[i]['duration']
        if duration == 1000:
            new_duration = 250
        else:
            new_duration = duration + 250
        rotated[i]['duration'] = new_duration
        
    return rotated

def create_duration_rotations(jsonpath, exit, name):
    """
    Args:
        jsonpath = path to master json created through sequence pipeline
        exit = destination path for new jsons 
        name = i.e. V1 
    
    Creates sequences rotated by duration so that all images in the master sequence are seen at each duration
    (across participants)
        
    """
    master = load_master_sequence(jsonpath)
    
#     r0 = master 
#     r0_path = exit + '/' + name + '_0.json'
#     #creates json file for the sequence 
#     with open(r0_path, 'w') as f:
#         json.dump(r0 , f)
        
#     r1 = rotate_sequence(r0)
    r1 = rotate_sequence(master)
    r1_path = exit + '/' + name + '_1.json'
    #creates json file for the sequence 
    with open(r1_path, 'w') as f:
        json.dump(r1 , f)
        
    r2 = rotate_sequence(r1)
    r2_path = exit + '/' + name + '_2.json'
    #creates json file for the sequence 
    with open(r2_path, 'w') as f:
        json.dump(r2 , f)
        
    r3 = rotate_sequence(r2)
    r3_path = exit + '/' + name + '_3.json'
    #creates json file for the sequence 
    with open(r3_path, 'w') as f:
        json.dump(r3 , f)    

        
def main_seq_rotations(json_folderpath, exit):
    """
    Create rotated sequence for each sequence in the folder
    """
    for file in os.listdir(json_folderpath):
        name = file.split(".")[0]
        jsonpath = json_folderpath + "/" + file
        try:
            create_duration_rotations(jsonpath, exit, name)
        except:
            print("Failed to create json rotations for: ", file)

In [195]:
jsons_location = '/Users/prachi/Documents/depth_duration/SUN-RGBD_stimuli_prep/V2_sequences'
jsons_destination = '/Users/prachi/Documents/depth_duration/SUN-RGBD_stimuli_prep/V2_sequences'

main_seq_rotations(jsons_location, jsons_destination)

Failed to create json rotations for:  .DS_Store
Failed to create json rotations for:  .ipynb_checkpoints
