In [278]:
import pandas as pd
import numpy as np
import random, os, sys 
from groo.groo import get_root

os.path.join(get_root(".tasks_root"))

'/data/drive/postdoc/grants/resources/tasks-for-grants'

In [279]:


def gen_bin_arr(size, no_relevant):

    if no_relevant > size:
        raise ValueError("Number of relevant values (1s) cannot exceed the size of the array.")
    
    # Step 1: Generate the binary array
    binary_array = np.zeros(size, dtype=int)
    binary_array[:no_relevant] = 1
    np.random.shuffle(binary_array)
    
    # Step 2: Assign sequential numbers to positions where the binary array has 1s
    sequential_numbers = np.arange(1, no_relevant + 1)  # Generate sequential numbers starting from 1
    np.random.shuffle(sequential_numbers)  # Shuffle the sequential numbers
    result_array = np.zeros_like(binary_array)  # Create an array of the same shape as the binary array
    result_array[binary_array == 1] = sequential_numbers  # Assign shuffled numbers to 1 positions
    
    return binary_array, result_array
##this.trialData[this.tridx].t_trial_start = performance.now();


In [280]:

conds = [c+str(i) for c in ["low", "mid", "high"] for i in range(1,5)] 
conds

['low1',
 'low2',
 'low3',
 'low4',
 'mid1',
 'mid2',
 'mid3',
 'mid4',
 'high1',
 'high2',
 'high3',
 'high4']

In [281]:
def sample_conditions(conds, lvls, no_relevant):
    while True:
        sampled_keys = random.sample(conds, no_relevant)
        sampled_values = [lvls[key][0] for key in sampled_keys]  # Extract first values
        
        if (any(val > 0 for val in sampled_values) and any(val < 0 for val in sampled_values)) or (no_relevant==1):
            return sampled_keys  # Ensure at least one positive and one negative


In [None]:
no_relevant = 4
no_contexts = 6
no_shown_t = 2

reps = 1 # 8 for training # 30 for main1
n_targets = 2

noise = 2 # standard deviation 


n_stim_total = 10
type ="main2" # "train" "main1" "main2"
flag = "_test"

conds = [c+str(i) for c in ["low", "mid", "high"] for i in range(1,5)] 
lvls = dict({
    "irr": [0, 0],
    "low1": [5, -15],
    "low2": [-5, 15],
    "low3": [15, -5],
    "low4": [-15, 5],
    "mid1": [25, -15], 
    "mid2": [-25, 15],
    "mid3": [15, -25],
    "mid4": [-15, 25],
    "high1": [40, -20],
    "high2": [-40, 20],
    "high3": [20, -40],
    "high4": [-20, 40]
})

for idx in range(0,5):
    print(i)

    basic_sched = pd.DataFrame()
    ii = 1
    for no_relevant in [no_relevant]:
        random_keys = sample_conditions(conds, lvls, no_relevant)
        ordered_keys = sorted(random_keys, key=lambda k: lvls[k][0], reverse=True)  # Sort by the upper bound descending

        while len(ordered_keys) < no_contexts:
            ordered_keys.append("irr")
        relevance = ["rel" if key != "irr" else "irr" for key in ordered_keys]

        stimuli = list(range(1, n_stim_total + 1))  # Stimuli numbered from 1 to 10
        stim_chosen = no_relevant*10 + np.array(random.sample(stimuli, no_contexts))

        positions = list(range(1, 7))  # Stimuli numbered from 1 to 10
        stim_positions = random.sample(positions, no_contexts)

        correct_stim = stim_positions[0]
        ## each rep will contain 2 trials (each for one target)
        for r in range(reps):
            #print(r)
            for t in range(n_targets):
                #for s in range()
                shown, order = gen_bin_arr(no_contexts,no_shown_t) 
                tdf = pd.DataFrame({"tr_id": ii,
                                    "no_rel_ctxts": no_relevant,
                                    "target": t+1, 
                                    "rel_cond": "rel"+str(no_relevant),
                                    "relevance": relevance, 
                                    "condition": ordered_keys,  
                                    "condition_id": np.arange(1,no_contexts+1),
                                    "outcome_rel": [lvls[x][t] for x in ordered_keys],
                                    "stim": stim_chosen,  
                                    "stim_positions": stim_positions,
                                    "shown": shown, 
                                    "order": order,
                                    "correct_stim": correct_stim-1, # minus 1 because it will refer to the position 
                                    "no_shown": no_shown_t, 
                                    "outcome_t1": [lvls[x][0] for x in ordered_keys],
                                    "outcome_t2": [lvls[x][1] for x in ordered_keys],
                                    })
                a=1
                
                basic_sched = pd.concat([basic_sched, tdf])
                ii = ii + 1
            

    df2 = basic_sched.copy()
    #basic_sched.to_csv(os.path.join(get_root(".tasks_root"), "contextual-inference", "schedules", "sch1_filtered.csv"))


    # filter and re-order schedule
    basic_sched = basic_sched.loc[basic_sched["shown"]==1,]


    basic_sched = basic_sched.set_index(["tr_id", "order"]).unstack("order")

    # Flatten column multi-index

    basic_sched.columns = [
        f"{col[0]}_stim{col[1]}" if col[1] else col[0] for col in basic_sched.columns
    ]
    constant_columns = [ "no_rel_ctxts", "target", "rel_cond",  "correct_stim", "no_shown"] #

    df_constants = df2[["tr_id"] + constant_columns].drop_duplicates().set_index("tr_id")

    # list of columns which need to be separated 
    per_sim = ["relevance", "condition", "condition_id", "outcome_rel", "outcome_t1", "outcome_t2", "stim", "stim_positions"]
    basic_sched = df_constants.merge(basic_sched.loc[:,[v + "_stim"+ str(st) for v in per_sim for st in np.arange(1,no_shown_t+1)] ], left_index=True, right_on="tr_id")

    # calculate total outcome
    if type == "train":
        basic_sched["outcome"] = basic_sched.filter(regex="outcome_rel").sum(axis=1) 
    elif (type == "main1") or (type == "main2"): 
        basic_sched["outcome"] = basic_sched.filter(regex="outcome_rel").sum(axis=1) + np.random.normal(0, noise,basic_sched.shape[0]).round()

    # calculate hypothetical outcome for both targets
    basic_sched["t1_ev"] = basic_sched.filter(regex="outcome_t1_").sum(axis=1)
    basic_sched["t2_ev"] = basic_sched.filter(regex="outcome_t2_").sum(axis=1)

    from sklearn.utils import shuffle
    basic_sched = shuffle(basic_sched)



    ## # add whether stim is shown or not
    if type == "train":
        rating_sections = [0.65] # breaks
        rating_freqs = [0,1] # proportion of 1s in each section 
    elif (type == "main1") or (type == "main2"): 
        rating_sections = [0.3, 0.6] # breaks
        rating_freqs = [0,0.2, 0.7] # proportion of 1s in each section 

    basic_sched = basic_sched.reset_index()
    # Determine section boundaries based on index positions
    split_indices = [int(basic_sched.shape[0] * prop) for prop in rating_sections] + [basic_sched.shape[0]]
    # Assign binomial values based on position
    basic_sched['require_rating'] = np.nan  # Initialize column
    start_idx = 0
    for i, end_idx in enumerate(split_indices):
        basic_sched.iloc[start_idx:end_idx, basic_sched.columns.get_loc('require_rating')] = np.random.binomial(1, rating_freqs[i], size=(end_idx - start_idx))
        start_idx = end_idx
    # Convert binomial column to integer
    basic_sched['require_rating'] = basic_sched['require_rating'].astype(int)


    ### A choice trial - at least one feature has to be relevant
    basic_sched["suit_for_dec"] = basic_sched.filter(regex="relevance_").apply(lambda row: 1 if "rel" in row.values else 0, axis=1)
    if type == "train":
        reward_tr_sections = [0.65] # breaks
        reward_tr_rating_freqs = [0,0.4] # proportion of 1s in each section 
    elif (type == "main1") or (type == "main2"):  
        reward_tr_sections = [0.2,0.5] # breaks
        reward_tr_freqs = [0,0.25, 0.6] # proportion of 1s in each section 

    basic_sched = basic_sched.reset_index()
    # Determine section boundaries based on index positions
    split_indices = [int(basic_sched.shape[0] * prop) for prop in reward_tr_sections] + [basic_sched.shape[0]]
    # Assign binomial values based on position
    basic_sched['decision'] = np.nan  # Initialize column
    start_idx = 0
    for i, end_idx in enumerate(split_indices):
        basic_sched.iloc[start_idx:end_idx, basic_sched.columns.get_loc('decision')] = np.random.binomial(1, reward_tr_freqs[i], size=(end_idx - start_idx))
        start_idx = end_idx
    # Convert binomial column to integer
    basic_sched['decision'] = basic_sched['decision'].astype(int)
    basic_sched['decision'] = basic_sched['decision']*basic_sched["suit_for_dec"]

    ## corect stim
    basic_sched["correct_stimuli"] = [list(basic_sched.loc[basic_sched["relevance_stim1"]=="rel", "condition_id_stim1" ].unique()-1)] * basic_sched.shape[0]
    

    basic_sched.to_csv(os.path.join(get_root(".tasks_root"), "contextual-inference", "schedules", "sch_r"+str(no_relevant)+"_"+type+"_"+str(idx)+flag+".csv"))
#basic_sched.to_csv(os.path.join(get_root(".tasks_root"), "contextual-inference", "schedules", "sch_r"+str(no_relevant)+"_train2_test.csv"))
#df2.to_csv(os.path.join(get_root(".tasks_root"), "contextual-inference", "schedules", "sch1_full.csv"))




2
2
2
2
2
