In [164]:
import os
import numpy as np
from pandas import read_csv
from collections import Counter
import pandas as pd

In [165]:
exp_param_directory = '../experimental_parameters/'

In [166]:
exp_param_file = exp_param_directory + 'level_matching_parameters.csv'

In [167]:
test_greebles_file = exp_param_directory + 'test_greeble_images.csv'
greebles = read_csv(test_greebles_file).values
greebles = [g[0] for g in greebles]
greebles

['m1~110-v1.tif',
 'm1~16-v1.tif',
 'm4~49-v1.tif',
 'f3~39-v1.tif',
 'f2~23-v1.tif',
 'f1~14-v1.tif',
 'm1~11-v1.tif',
 'f4~41-v1.tif',
 'm2~210-v1.tif',
 'f3~38-v1.tif',
 'f3~310-v1.tif',
 'm4~46-v1.tif',
 'm2~22-v1.tif',
 'f4~44-v1.tif',
 'f3~34-v1.tif',
 'f4~42-v1.tif',
 'm3~31-v1.tif',
 'f3~32-v1.tif',
 'm1~13-v1.tif',
 'f5~53-v1.tif',
 'f2~24-v1.tif',
 'm2~23-v1.tif',
 'm1~14-v1.tif',
 'm5~56-v1.tif',
 'm4~45-v1.tif',
 'f5~55-v1.tif',
 'f5~57-v1.tif',
 'm5~55-v1.tif',
 'f1~15-v1.tif',
 'm4~44-v1.tif',
 'm3~33-v1.tif',
 'm5~54-v1.tif',
 'f1~13-v1.tif',
 'f5~52-v1.tif',
 'f5~510-v1.tif',
 'm2~21-v1.tif',
 'm2~25-v1.tif',
 'f2~25-v1.tif',
 'm3~32-v1.tif',
 'm4~41-v1.tif']

In [168]:
n = len(greebles)
num_match = 4
reps_individual = 6
num_blocks = 24

In [169]:
greebles = [g[0]+g.split('~')[1].split('-')[0] for g in greebles]

In [170]:
male = [g for g in greebles if g[0] == 'm']
female = [g for g in greebles if g[0] == 'f']
f1 = [g for g in greebles if g[1] == '1']
f2 = [g for g in greebles if g[1] == '2']
f3 = [g for g in greebles if g[1] == '3']
f4 = [g for g in greebles if g[1] == '4']
f5 = [g for g in greebles if g[1] == '5']
families = [f1, f2, f3, f4, f5]

In [171]:
levels = ['family','sex']

In [172]:
def update(counter, selected):
    cp = counter.copy()
    for s in selected:
        cp[s] += 1
    return cp

In [173]:
def compute_leftover(limit):
    leftover = list()
    for k in gcounter.keys():
        if gcounter[k]<limit:
            leftover.append(k)
    for g in greebles:
        if g not in gcounter.keys():
            leftover.append(g)
    return leftover

In [174]:
def put_repeat_ahead(items):
    for i, e in enumerate(items):
        item_left = list(items.copy())
        ind_left = list(np.arange(len(items)))
        
        item_left.remove(e)
        ind_left.remove(i)
        
        if e in item_left:
            output = [e] + item_left
            output_idx = [i] + ind_left
            assert len(output) == len(items)
            return output, output_idx

# individual

In [175]:
def individual_sampling(counter, num, sample_list):
    g_ind = np.random.choice(len(sample_list), num_match)
    rep_ind = g_ind[np.random.choice(num_match,1)]
    trial_idx = np.append(rep_ind, g_ind)
    gbs = np.array(sample_list)[trial_idx]
    return gbs

In [176]:
def generate_individual_trials(counter, num_match, sample_list):
    gbs = individual_sampling(counter, num_match, sample_list)
#     print(gbs)
    counter_copy = update(gcounter, gbs)
    if counter_copy.most_common(1)[0][1]>reps_individual:
        return generate_individual_trials(counter, num_match, sample_list)
    else:
        return gbs, counter_copy

# family

In [177]:
def family_sampling(counter, num):
    norepeat = True
    while norepeat: # norepeat means not satisfying the criteria of having two families in one trials
        g_ind = np.random.choice(len(greebles), num_match+1)
        gs = np.array(greebles)[g_ind]
        fs = [g[1] for g in gs]
        if len(list(set(fs))) < len(fs):
            norepeat = False
            _, present_idx = put_repeat_ahead(fs)
            present_gs = gs[present_idx]
    return present_gs

In [178]:
def generate_family_trials(counter, num_match):
    gbs = family_sampling(counter, num_match)
    counter_copy = update(gcounter, gbs)
    if counter_copy.most_common(1)[0][1]>reps_individual:
        return generate_family_trials(counter, num_match)
    else:
        return gbs, counter_copy

# sex

In [179]:
def sex_sampling(counter, num, sample_list):
    bad_ratio = True
    while bad_ratio:
        g_ind = np.random.choice(len(sample_list), num_match+1)
        gs = np.array(sample_list)[g_ind]
        f = [1 for g in gs if g[0] == 'f']
        m = [1 for g in gs if g[0] == 'm']
        if abs(sum(f) - sum(m)) < 4:
            bad_ratio = False
    return np.array(sample_list)[g_ind]
def generate_sex_trials(counter, num_match, sample_list):
    gbs = sex_sampling(counter, num_match, sample_list)
    counter_copy = update(gcounter, gbs)
    if counter_copy.most_common(1)[0][1]>reps_individual:
        return generate_sex_trials(counter, num_match, sample_list)
    else:
        return gbs, counter_copy

In [180]:
gcounter = Counter()

In [181]:
family_samples = list()

In [182]:
for i in range(num_blocks):
    gbs, gcounter = generate_family_trials(gcounter, num_match)
    family_samples.append(gbs)

In [183]:
family_samples

[array(['m32', 'm23', 'm41', 'm33', 'f13'], dtype='<U4'),
 array(['f32', 'f52', 'm23', 'f14', 'f310'], dtype='<U4'),
 array(['m25', 'f25', 'm54', 'm25', 'm41'], dtype='<U4'),
 array(['m44', 'm54', 'm14', 'f32', 'f41'], dtype='<U4'),
 array(['f44', 'm16', 'f44', 'f42', 'm41'], dtype='<U4'),
 array(['m14', 'f57', 'f38', 'm46', 'f14'], dtype='<U4'),
 array(['f15', 'm21', 'f42', 'f13', 'm49'], dtype='<U4'),
 array(['m21', 'm23', 'm21', 'm41', 'f52'], dtype='<U4'),
 array(['m41', 'f14', 'f39', 'f15', 'm45'], dtype='<U4'),
 array(['m54', 'm14', 'f34', 'm25', 'f53'], dtype='<U4'),
 array(['m54', 'f24', 'f44', 'm23', 'f53'], dtype='<U4'),
 array(['f13', 'm14', 'm41', 'f53', 'f57'], dtype='<U4'),
 array(['m23', 'f23', 'm45', 'f24', 'f14'], dtype='<U4'),
 array(['f57', 'f24', 'f510', 'm44', 'f14'], dtype='<U4'),
 array(['m13', 'm16', 'f53', 'm32', 'm11'], dtype='<U4'),
 array(['f52', 'm31', 'f34', 'm55', 'f44'], dtype='<U4'),
 array(['f44', 'f24', 'f13', 'm44', 'f34'], dtype='<U4'),
 array(['m11

In [184]:
leftover = compute_leftover(reps_individual)

In [185]:
sex_samples = list()

In [186]:
for i in range(num_blocks):
    leftover = compute_leftover(reps_individual)
    gbs, gcounter = generate_sex_trials(gcounter, num_match, leftover)
    sex_samples.append(gbs)

In [187]:
sex_samples

[array(['m16', 'f15', 'm110', 'm210', 'f15'], dtype='<U4'),
 array(['f13', 'm22', 'm11', 'm45', 'm45'], dtype='<U4'),
 array(['f24', 'f57', 'm56', 'm56', 'f42'], dtype='<U4'),
 array(['f52', 'm49', 'm210', 'f32', 'f42'], dtype='<U4'),
 array(['m49', 'f52', 'm31', 'f32', 'f38'], dtype='<U4'),
 array(['m210', 'f510', 'f32', 'm55', 'm54'], dtype='<U4'),
 array(['m22', 'm45', 'f34', 'f23', 'f25'], dtype='<U4'),
 array(['m54', 'f14', 'm21', 'f55', 'f510'], dtype='<U4'),
 array(['m32', 'm56', 'f42', 'm11', 'f41'], dtype='<U4'),
 array(['m11', 'f23', 'm21', 'm13', 'm110'], dtype='<U4'),
 array(['m31', 'm45', 'f23', 'm23', 'f310'], dtype='<U4'),
 array(['m32', 'f510', 'm49', 'm22', 'm25'], dtype='<U4'),
 array(['m110', 'm46', 'm25', 'm22', 'f41'], dtype='<U4'),
 array(['m16', 'f15', 'm49', 'f38', 'm13'], dtype='<U4'),
 array(['f34', 'm13', 'm31', 'f310', 'm110'], dtype='<U4'),
 array(['m46', 'f32', 'f41', 'm32', 'm210'], dtype='<U4'),
 array(['m16', 'm31', 'm110', 'f41', 'm46'], dtype='<U4'),


# Save sampling procedure

In [188]:
num_trials = num_blocks*2
trial = np.arange(num_trials)
np.random.shuffle(trial)
trial

array([28, 33, 39, 14, 12, 10,  6, 40, 15, 35,  3, 30, 44, 25, 42, 38, 18,
       29, 41, 20, 17, 26, 13, 24, 23, 36, 45,  9,  5, 19,  4,  0, 47, 27,
       46,  2, 22,  1, 43, 21, 11, 16, 34, 32,  8,  7, 37, 31])

In [189]:
def gname(g):
    s = g[0] + g[1] + '~' + g[1:] + '-v1.tif'
    return s

In [194]:
df = pd.DataFrame()
t = 0
# for g in individual_samples:
#     vd = dict()
#     vd['level'] = 'individual'
#     vd['sample'] = gname(g[0])
#     vd['match1'] = gname(g[1])
#     vd['match2'] = gname(g[2])
#     vd['match3'] = gname(g[3])
#     vd['match4'] = gname(g[4])
#     vd['trial'] = int(trial[t])
#     df = df.append(vd, ignore_index=True)
#     t+=1
for g in family_samples:
    vd = dict()
    vd['level'] = 'Family'
    vd['sample'] = gname(g[0])
    vd['match1'] = gname(g[1])
    vd['match2'] = gname(g[2])
    vd['match3'] = gname(g[3])
    vd['match4'] = gname(g[4])
    vd['trial'] = int(trial[t])
    df = df.append(vd, ignore_index=True)
    t+=1
for g in sex_samples:
    vd = dict()
    vd['level'] = 'Sex'
    vd['sample'] = gname(g[0])
    vd['match1'] = gname(g[1])
    vd['match2'] = gname(g[2])
    vd['match3'] = gname(g[3])
    vd['match4'] = gname(g[4])
    vd['trial'] = int(trial[t])
    df = df.append(vd, ignore_index=True)
    t+=1

In [195]:
print(df)

     level         match1         match2         match3         match4  \
0   Family   m2~23-v1.tif   m4~41-v1.tif   m3~33-v1.tif   f1~13-v1.tif   
1   Family   f5~52-v1.tif   m2~23-v1.tif   f1~14-v1.tif  f3~310-v1.tif   
2   Family   f2~25-v1.tif   m5~54-v1.tif   m2~25-v1.tif   m4~41-v1.tif   
3   Family   m5~54-v1.tif   m1~14-v1.tif   f3~32-v1.tif   f4~41-v1.tif   
4   Family   m1~16-v1.tif   f4~44-v1.tif   f4~42-v1.tif   m4~41-v1.tif   
5   Family   f5~57-v1.tif   f3~38-v1.tif   m4~46-v1.tif   f1~14-v1.tif   
6   Family   m2~21-v1.tif   f4~42-v1.tif   f1~13-v1.tif   m4~49-v1.tif   
7   Family   m2~23-v1.tif   m2~21-v1.tif   m4~41-v1.tif   f5~52-v1.tif   
8   Family   f1~14-v1.tif   f3~39-v1.tif   f1~15-v1.tif   m4~45-v1.tif   
9   Family   m1~14-v1.tif   f3~34-v1.tif   m2~25-v1.tif   f5~53-v1.tif   
10  Family   f2~24-v1.tif   f4~44-v1.tif   m2~23-v1.tif   f5~53-v1.tif   
11  Family   m1~14-v1.tif   m4~41-v1.tif   f5~53-v1.tif   f5~57-v1.tif   
12  Family   f2~23-v1.tif   m4~45-v1.t

In [196]:
df = df.sort_values(by=['trial'])

In [214]:
num_blocks_per_run = 8
runs = np.repeat(np.arange(num_trials/num_blocks_per_run), num_blocks_per_run)

In [215]:
df['run'] = runs

In [216]:
df

Unnamed: 0,level,match1,match2,match3,match4,sample,trial,run
31,Sex,f1~14-v1.tif,m2~21-v1.tif,f5~55-v1.tif,f5~510-v1.tif,m5~54-v1.tif,0.0,0.0
37,Sex,f1~15-v1.tif,m4~49-v1.tif,f3~38-v1.tif,m1~13-v1.tif,m1~16-v1.tif,1.0,0.0
35,Sex,f5~510-v1.tif,m4~49-v1.tif,m2~22-v1.tif,m2~25-v1.tif,m3~32-v1.tif,2.0,0.0
10,Family,f2~24-v1.tif,f4~44-v1.tif,m2~23-v1.tif,f5~53-v1.tif,m5~54-v1.tif,3.0,0.0
30,Sex,m4~45-v1.tif,f3~34-v1.tif,f2~23-v1.tif,f2~25-v1.tif,m2~22-v1.tif,4.0,0.0
28,Sex,f5~52-v1.tif,m3~31-v1.tif,f3~32-v1.tif,f3~38-v1.tif,m4~49-v1.tif,5.0,0.0
6,Family,m2~21-v1.tif,f4~42-v1.tif,f1~13-v1.tif,m4~49-v1.tif,f1~15-v1.tif,6.0,0.0
45,Sex,f5~55-v1.tif,f5~55-v1.tif,m3~33-v1.tif,f3~39-v1.tif,f2~25-v1.tif,7.0,0.0
44,Sex,m3~33-v1.tif,m4~44-v1.tif,m5~55-v1.tif,f5~55-v1.tif,f3~310-v1.tif,8.0,1.0
27,Sex,m4~49-v1.tif,m2~210-v1.tif,f3~32-v1.tif,f4~42-v1.tif,f5~52-v1.tif,9.0,1.0


In [217]:
df.to_csv(exp_param_file)

In [218]:
exp_param = read_csv(exp_param_file, header=0)

In [219]:
kk = exp_param.values

In [220]:
kk

array([[31, 'Sex', 'f1~14-v1.tif', 'm2~21-v1.tif', 'f5~55-v1.tif',
        'f5~510-v1.tif', 'm5~54-v1.tif', 0.0, 0.0],
       [37, 'Sex', 'f1~15-v1.tif', 'm4~49-v1.tif', 'f3~38-v1.tif',
        'm1~13-v1.tif', 'm1~16-v1.tif', 1.0, 0.0],
       [35, 'Sex', 'f5~510-v1.tif', 'm4~49-v1.tif', 'm2~22-v1.tif',
        'm2~25-v1.tif', 'm3~32-v1.tif', 2.0, 0.0],
       [10, 'Family', 'f2~24-v1.tif', 'f4~44-v1.tif', 'm2~23-v1.tif',
        'f5~53-v1.tif', 'm5~54-v1.tif', 3.0, 0.0],
       [30, 'Sex', 'm4~45-v1.tif', 'f3~34-v1.tif', 'f2~23-v1.tif',
        'f2~25-v1.tif', 'm2~22-v1.tif', 4.0, 0.0],
       [28, 'Sex', 'f5~52-v1.tif', 'm3~31-v1.tif', 'f3~32-v1.tif',
        'f3~38-v1.tif', 'm4~49-v1.tif', 5.0, 0.0],
       [6, 'Family', 'm2~21-v1.tif', 'f4~42-v1.tif', 'f1~13-v1.tif',
        'm4~49-v1.tif', 'f1~15-v1.tif', 6.0, 0.0],
       [45, 'Sex', 'f5~55-v1.tif', 'f5~55-v1.tif', 'm3~33-v1.tif',
        'f3~39-v1.tif', 'f2~25-v1.tif', 7.0, 0.0],
       [44, 'Sex', 'm3~33-v1.tif', 'm4~44-v1.tif'