In [1]:
import numpy as np
import PIL
import matplotlib.pyplot as plt
import pandas as pd
import os
import copy
import scipy.io
import sys
sys.path.append('/user_data/mmhender/texture_synthesis/code/')
import things_utils

import warnings
warnings.filterwarnings('ignore')

In [2]:
things_root = '/user_data/mmhender/things/'

In [3]:
project_root = '/user_data/mmhender/featsynth/'

In [4]:
import dropbox

def init_dropbox():
    with open(os.path.join(project_root, 'expt_design/dbx_token.txt')) as f:
        token = f.readlines()[0]
    dbx = dropbox.Dropbox(token)
    return dbx

dbx = init_dropbox()

def get_shared_url(path, dbx=None):
 
    if dbx==None:
        dbx = init_dropbox()
    result = dbx.sharing_get_shared_links(path)
    if len(result.links)==0:
        meta = dbx.sharing_create_shared_link_with_settings(path)
        result = dbx.sharing_get_shared_links(path)
    url = result.links[0].url
    url = url.replace('www.dropbox', 'dl.dropboxusercontent')
    
    return url

In [5]:
def make_trial_info(rndseed = None):

    concepts_filename = os.path.join(things_root, 'concepts_use.npy')
    concepts_use = np.load(concepts_filename,allow_pickle=True).item()
    categ_names = concepts_use['categ_names']
    concept_names = concepts_use['concept_names_subsample']
    image_names = concepts_use['image_names']

    concepts_all = np.concatenate(concept_names)
    categ_all = np.repeat(categ_names, [len(cc) for cc in concept_names])

    n_categ_use = 10
    # n_categ_use = 4
    n_concepts_use = 6
    n_ex_use = 4;
    assert(np.mod(n_ex_use,4)==0)
    n_resp_conds = 2;

    # image types are original or "synth" from different DNN layers
    n_layers=4
    n_image_types = n_layers+1 
    
    n_trials_total = n_categ_use * n_concepts_use * n_ex_use * n_image_types
 
    n_runs = 12;
    assert(np.mod(n_trials_total, n_runs)==0)
    n_trials_per_run = int(n_trials_total/n_runs);
    
    # randomly selecting the subset of categories and concepts that will be used here
    if rndseed is None:
        rndseed = 456466
    np.random.seed(rndseed)
    # categ_inds_use = np.arange(n_categ_use)
    categ_inds_use = np.random.choice(np.arange(len(categ_names)), n_categ_use, replace=False)
    categ_use = np.array(categ_names)[categ_inds_use]
    print(categ_use)
    concepts_use = [np.random.choice(concept_names[ca], n_concepts_use, replace=False) for ca in categ_inds_use]
    # image_names_use = [[np.random.choice(image_names[co], n_ex_use, replace=False) for co in conc] \
    #                    for conc in concepts_use]
    image_names_use = [[image_names[co][0:n_ex_use] for co in conc] \
                       for conc in concepts_use]

    trial_info = pd.DataFrame({'trial_num_overall': np.zeros((n_trials_total,)), 
                               'categ_ind': np.zeros((n_trials_total,)),
                              'concept_ind': np.zeros((n_trials_total,)),
                              'super_name': np.zeros((n_trials_total,)),
                              'basic_name': np.zeros((n_trials_total,)),
                              'ex_num': np.zeros((n_trials_total,)),
                              'image_type_num': np.zeros((n_trials_total,)),
                              'image_name': np.zeros((n_trials_total,)),
                              'dropbox_url': np.zeros((n_trials_total,)),
                              'target_present': np.zeros((n_trials_total,)),
                              'cue_level': np.zeros((n_trials_total,)),
                              'cue_name': np.zeros((n_trials_total,)),
                              })

    layer_names = [None, 'pool1','pool2','pool3','pool4']

    tt=-1
    for ca in range(n_categ_use):
        for co in range(n_concepts_use):
            for ex in range(n_ex_use):
                for typ in range(n_image_types):
                    tt+=1
                    trial_info['trial_num_overall'].iloc[tt] = tt
                    trial_info['categ_ind'].iloc[tt] = ca
                    trial_info['concept_ind'].iloc[tt] = co
                    trial_info['super_name'].iloc[tt] = categ_use[ca]
                    trial_info['basic_name'].iloc[tt] = concepts_use[ca][co]
                    trial_info['ex_num'].iloc[tt] = ex
                    trial_info['image_type_num'].iloc[tt] = typ

                    name_raw = image_names_use[ca][co][ex].split('.jpg')[0]
                    if layer_names[typ] is None:
                        # trial_info['image_name'].iloc[tt] = os.path.join(name_raw,'orig.png')
                        trial_info['image_name'].iloc[tt] = os.path.join('%s_orig.png'%name_raw)
                        dbxpath = '/images/'+name_raw+'/orig.png'
                        trial_info['dropbox_url'].iloc[tt] = get_shared_url(dbxpath, dbx)
                    else:
                        # trial_info['image_name'].iloc[tt] = os.path.join(name_raw,
                        #                                              'grid5_1x1_upto_%s.png'%layer_names[typ])
                        trial_info['image_name'].iloc[tt] = os.path.join('%s_grid5_1x1_upto_%s.png'%\
                                                                         (name_raw,layer_names[typ]))
                        dbxpath = '/images/'+name_raw+'/grid5_1x1_upto_%s.png'%layer_names[typ]
                        trial_info['dropbox_url'].iloc[tt] = get_shared_url(dbxpath, dbx)

                    if np.mod(ex,4)==0:
                        trial_info['target_present'].iloc[tt] = True
                        trial_info['cue_level'].iloc[tt] = 'basic'
                    elif np.mod(ex,4)==1:
                        trial_info['target_present'].iloc[tt] = False
                        trial_info['cue_level'].iloc[tt] = 'basic'
                    elif np.mod(ex,4)==2:
                        trial_info['target_present'].iloc[tt] = True
                        trial_info['cue_level'].iloc[tt] = 'super'
                    elif np.mod(ex,4)==3:
                        trial_info['target_present'].iloc[tt] = False
                        trial_info['cue_level'].iloc[tt] = 'super'

    # decide what name to use to "cue" each trial, based on which level they are being cued at
    # (these are all "correct names"; some of them get changed to incorrect in the next step)
    trial_info['cue_name'] = trial_info['super_name']
    trial_info['cue_name'][trial_info['cue_level']=='basic'] = \
        trial_info['basic_name'][trial_info['cue_level']=='basic']

    # Assign incorrect names to all the basic-level target-absent trials
    # always swapping basic-level names across trials with same superordinate 
    # level name, and same in all other attributes
    lev = 'basic'

    for ca in range(n_categ_use):
        for typ in range(n_image_types):
            for ex in range(n_ex_use):

                group = (trial_info['target_present']==False) & \
                        (trial_info['categ_ind']==ca) & \
                        (trial_info['cue_level']==lev) & \
                        (trial_info['image_type_num']==typ) & \
                        (trial_info['ex_num']==ex)

                actual_basic_inds = np.array(trial_info['concept_ind'][group])
                actual_basic_names = np.array(trial_info['basic_name'][group])
                # switch the names around pseudo-randomly, so they are 
                # always cued with the wrong name on these trials.
                incorrect_basic_inds = swap_rand_pairs(actual_basic_inds).astype(int)

                trial_info['cue_name'].iloc[group] = actual_basic_names[incorrect_basic_inds]


    # Assign incorrect names to all the superord-level target-absent trials
    lev = 'super'

    for co in range(n_concepts_use):
        for typ in range(n_image_types):
            for ex in range(n_ex_use):

                group = (trial_info['target_present']==False) & \
                        (trial_info['concept_ind']==co) & \
                        (trial_info['cue_level']==lev) & \
                        (trial_info['image_type_num']==typ) & \
                        (trial_info['ex_num']==ex)

                actual_super_inds = np.array(trial_info['categ_ind'][group])
                actual_super_names = np.array(trial_info['super_name'][group])
                # switch the names around pseudo-randomly, so they are 
                # always cued with the wrong name on these trials.
                incorrect_super_inds = swap_rand_pairs(actual_super_inds).astype(int)

                trial_info['cue_name'].iloc[group] = actual_super_names[incorrect_super_inds]
                
    # shuffle the order of everything together.
    shuff_order = np.random.permutation(np.arange(n_trials_total))
    trial_info = trial_info.iloc[shuff_order]

    # finally, assign the run numbers for each trial (these are the only non-shuffled columns).
    trial_info['run_number'] = np.repeat(np.arange(n_runs), n_trials_per_run) + 1
    trial_info['trial_in_run'] = np.tile(np.arange(n_trials_per_run), [n_runs,]) + 1
   
    # double check everything
    check_trial_info(trial_info)
    
    # now save, in a format easy to load in matlab later
    expt_design_folder = os.path.join(project_root, 'expt_design')
    if not os.path.exists(expt_design_folder):
        os.makedirs(expt_design_folder)
    trialinfo_filename1 =  os.path.join(expt_design_folder, 'trial_info_%d.csv'%rndseed)
    print('saving to %s'%trialinfo_filename1)
    trial_info.to_csv(trialinfo_filename1, index=False)
    trialinfo_filename2 =  os.path.join(expt_design_folder, 'trial_info_%d.json'%rndseed)
    print('saving to %s'%trialinfo_filename2)
    trial_info.to_json(trialinfo_filename2)
    
    return

In [6]:
def check_trial_info(ti):

    # make sure that the target present/target absent trials line up correctly
    # (cue name should match actual name for target present, and should 
    # always mismatch for target absent)

    inds_check = (ti['cue_level']=='basic') & (ti['target_present']==True)
    assert(np.all(ti['cue_name'][inds_check]==ti['basic_name'][inds_check]))
    inds_check = (ti['cue_level']=='basic') & (ti['target_present']==False)
    assert(np.all(ti['cue_name'][inds_check]!=ti['basic_name'][inds_check]))

    inds_check = (ti['cue_level']=='super') & (ti['target_present']==True)
    assert(np.all(ti['cue_name'][inds_check]==ti['super_name'][inds_check]))
    inds_check = (ti['cue_level']=='super') & (ti['target_present']==False)
    assert(np.all(ti['cue_name'][inds_check]!=ti['super_name'][inds_check]))

    # check to make sure individual attributes are distributed evenly across trials 
    n_trials_total = ti.shape[0]

    attr_check_even = ['super_name', 'basic_name', 'ex_num', 'image_type_num', 'target_present', 'cue_level']
    for attr in attr_check_even:

        # should be an equal number of each thing 
        un, counts = np.unique(ti[attr], return_counts=True)
        assert(np.all(counts==n_trials_total/len(un)))

    # check the counterbalancing over multiple attributes

    # there should be an equal number of trials in each of the combinations of these 
    # different attribute "levels". for example each combination of category/image type. 

    attr_balanced = [ti['categ_ind'], ti['concept_ind'], ti['image_type_num'], ti['target_present'], ti['cue_level']]
    attr_balanced_inds = np.array([np.unique(attr, return_inverse=True)[1] for attr in attr_balanced]).T

    n_levels_each = [len(np.unique(attr)) for attr in attr_balanced]
    n_combs_expected = np.prod(n_levels_each)
    n_repeats_expected = n_trials_total/n_combs_expected

    un_rows, counts = np.unique(attr_balanced_inds,axis=0, return_counts=True)

    assert(un_rows.shape[0]==n_combs_expected)
    assert(np.all(counts==n_repeats_expected))


In [7]:
def swap_rand_pairs(sequence):
    
    n = len(sequence)
    assert(np.mod(n, 2)==0)
    
    randpairs = np.floor(np.random.permutation(np.arange(n))/2).astype(int)
    new_sequence = np.zeros(np.shape(sequence))
    
    for ii in range(int(n/2)):
        
        pair_inds = randpairs==ii
        new_sequence[pair_inds] = np.flip(sequence[pair_inds])
        
    return new_sequence

In [8]:
# make_trial_info(234355)
make_trial_info(565676)

['office_supply' 'body_part' 'fruit' 'bird' 'part_of_car' 'toy' 'dessert'
 'drink' 'clothing' 'vegetable']
saving to /user_data/mmhender/featsynth/expt_design/trial_info_565676.csv
saving to /user_data/mmhender/featsynth/expt_design/trial_info_565676.json


In [10]:
ti = pd.read_csv('/user_data/mmhender/featsynth/expt_design/trial_info_565676.csv')
ti

Unnamed: 0,trial_num_overall,categ_ind,concept_ind,super_name,basic_name,ex_num,image_type_num,image_name,dropbox_url,target_present,cue_level,cue_name,run_number,trial_in_run
0,709.0,5.0,5.0,toy,dollhouse,1.0,4.0,dollhouse_02s_grid5_1x1_upto_pool4.png,https://dl.dropboxusercontent.com/s/rlkdum7itu...,False,basic,hula_hoop,1,1
1,978.0,8.0,0.0,clothing,cape,3.0,3.0,cape_04s_grid5_1x1_upto_pool3.png,https://dl.dropboxusercontent.com/s/xlhs45lz02...,False,super,dessert,1,2
2,779.0,6.0,2.0,dessert,baklava,3.0,4.0,baklava_04s_grid5_1x1_upto_pool4.png,https://dl.dropboxusercontent.com/s/o9gv2i3brr...,False,super,body_part,1,3
3,1059.0,8.0,4.0,clothing,pantsuit,3.0,4.0,pantsuit_04s_grid5_1x1_upto_pool4.png,https://dl.dropboxusercontent.com/s/u2g0kh0bza...,False,super,fruit,1,4
4,1133.0,9.0,2.0,vegetable,arugula,2.0,3.0,arugula_03s_grid5_1x1_upto_pool3.png,https://dl.dropboxusercontent.com/s/up9oc50tz9...,True,super,vegetable,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,971.0,8.0,0.0,clothing,cape,2.0,1.0,cape_03s_grid5_1x1_upto_pool1.png,https://dl.dropboxusercontent.com/s/2qbfmek329...,True,super,clothing,12,96
1196,160.0,1.0,2.0,body_part,elbow,0.0,0.0,elbow_01b_orig.png,https://dl.dropboxusercontent.com/s/xrhf8qsq64...,True,basic,elbow,12,97
1197,949.0,7.0,5.0,drink,juice,1.0,4.0,juice_02s_grid5_1x1_upto_pool4.png,https://dl.dropboxusercontent.com/s/c58fl9d0by...,False,basic,smoothie,12,98
1198,472.0,3.0,5.0,bird,toucan,2.0,2.0,toucan_03s_grid5_1x1_upto_pool2.png,https://dl.dropboxusercontent.com/s/z4anqubjpe...,True,super,bird,12,99


In [13]:
for rr in np.arange(1,13):

    ti_thisrun = ti.iloc[np.array(ti['run_number'])==rr]
    expt_design_folder = os.path.join(project_root, 'expt_design')
    js_filename = os.path.join(expt_design_folder, 'trialseq_run%d.js'%rr)
    make_trial_js(ti_thisrun, js_filename, var_name='info')

In [56]:
for rr in np.arange(1,2):

    ti_thisrun = ti.iloc[np.array(ti['run_number'])==rr]
    ti_thisrun['image_name'].iloc[np.arange(0,100,2)] = 'dollhouse_02s_grid5_1x1_upto_pool4.png'
    ti_thisrun['image_name'].iloc[np.arange(1,100,2)] = 'cape_04s_grid5_1x1_upto_pool3.png'
    expt_design_folder = os.path.join(project_root, 'expt_design')
    js_filename = os.path.join(expt_design_folder, 'trialseq_TEST_run%d.js'%rr)
    make_trial_js(ti_thisrun, js_filename, var_name='info')

In [57]:
ti_thisrun

Unnamed: 0,trial_num_overall,categ_ind,concept_ind,super_name,basic_name,ex_num,image_type_num,image_name,target_present,cue_level,cue_name,run_number,trial_in_run
0,709.0,5.0,5.0,toy,dollhouse,1.0,4.0,dollhouse_02s_grid5_1x1_upto_pool4.png,False,basic,hula_hoop,1,1
1,978.0,8.0,0.0,clothing,cape,3.0,3.0,cape_04s_grid5_1x1_upto_pool3.png,False,super,dessert,1,2
2,779.0,6.0,2.0,dessert,baklava,3.0,4.0,dollhouse_02s_grid5_1x1_upto_pool4.png,False,super,body_part,1,3
3,1059.0,8.0,4.0,clothing,pantsuit,3.0,4.0,cape_04s_grid5_1x1_upto_pool3.png,False,super,fruit,1,4
4,1133.0,9.0,2.0,vegetable,arugula,2.0,3.0,dollhouse_02s_grid5_1x1_upto_pool4.png,True,super,vegetable,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1034.0,8.0,3.0,clothing,jumpsuit,2.0,4.0,cape_04s_grid5_1x1_upto_pool3.png,True,super,clothing,1,96
96,1039.0,8.0,3.0,clothing,jumpsuit,3.0,4.0,dollhouse_02s_grid5_1x1_upto_pool4.png,False,super,fruit,1,97
97,1162.0,9.0,4.0,vegetable,kale,0.0,2.0,cape_04s_grid5_1x1_upto_pool3.png,True,basic,kale,1,98
98,1146.0,9.0,3.0,vegetable,rhubarb,1.0,1.0,dollhouse_02s_grid5_1x1_upto_pool4.png,False,basic,beet,1,99


In [12]:
def make_trial_js(df, js_filename, var_name='info'):
    
    n_trials = df.shape[0]

    with open(js_filename,'w') as f:

        f.write('var %s = [\n'%var_name)

        for tt in range(n_trials):
            
            trial_info = dict(df.iloc[tt])

            f.write('    {\n')
            for key,val in zip(trial_info.keys(), trial_info.values()):

                if isinstance(val, str):
                    f.write('    \'%s\': \'%s\',\n'%(key, val))
                else:
                    f.write('    \'%s\': %d,\n'%(key, val))

            f.write('    },\n')

        f.write('];\n')

In [18]:
url = get_shared_url(path = '/images/aloe_02s/orig.png')
url

'https://dl.dropboxusercontent.com/s/2mdxubcv6biu9sk/orig.png?dl=0'