# Sequence generation for the pseudoRWM set of experiments

## Import statements and utilities

In [1]:
import numpy as np
import pandas as pd

In [2]:
def shuffle_along_axis(arr, axis):
    idx = np.random.rand(*arr.shape).argsort(axis=axis)
    return np.take_along_axis(arr, idx, axis=axis)


def shuffled(arr):
    arr_shuffled = arr.copy()
    np.random.shuffle(arr_shuffled)
    return arr_shuffled


def choose_n_and_delete(arr, N):
    chosen = np.random.choice(arr, size=N, replace=False)
    arr = np.delete(arr, np.where(np.isin(arr, chosen)))
    return chosen, arr

## Settings

In [3]:
exp_type = "reverse_points"
num_conditions = 1  # number of different file sequences to generate (change if needed)
reps_dict = {
    "reverse_points": 5,
}  # number of repetitions after the first presentation
num_reps = reps_dict[
    exp_type
]  # number of stimulus repetitions (after first presentation; change if needed)
exact_reps = True # should the number of repetitions be exact or ok to exceed by 1?

## Main sequence creation function

In [9]:
# class Conditions:
#     def __init__(self, ns, trial_type):


class pseudoRWMSequenceMaker:

    def __init__(
        self,
        exp_type,
        num_reps,
        num_conditions=10,
        exact_reps=False,
        max_consec_goal_reps=24,
    ):

        # ================ SETTINGS ==========================================================================
        assert exp_type in [
            "reverse_points",
        ], f"{exp_type} is not a valid exp_type"
        self.exp_type = exp_type
        self.num_reps = num_reps
        self.num_conditions = num_conditions
        self.exact_reps = exact_reps
        self.to_dir = f"/Users/ccnlab/Development/sequences/reverse-points/v1/"
        self.num_keys = 3
        self.max_stims = 6
        # list of (set_size, TRIAL TYPES)
        # trial_types: array  where each element represent the sequence
        # of trial types (1 = Points/Standard, 0 = Goals/Reverse) a participant will experience
        self.block_structure = [
            (2, 1),
            (2, 0),
        ]
        self.num_blocks = len(self.block_structure)

        self.max_consec_goal_reps = max_consec_goal_reps

        # set_size_to_key_list: columns represent keyboard keys
        # each column says how many stimuli will be associated with that key
        # e.g., 1, 2, 3 means 1 stimulus will be associated with key 0, 2, with key 1, 3 with key 2,
        # for a total of 6 items
        self.set_size_to_key_list = {
            2: np.vstack(
                (
                    np.array([1, 0, 1]),
                    np.array([1, 1, 0]),
                    np.array([0, 1, 1]),
                    np.array([1, 0, 1]),
                )
            ),
            6: np.vstack(
                (
                    np.array([1, 2, 3]),
                    np.array([1, 3, 2]),
                    np.array([2, 2, 2]),
                    np.array([3, 2, 1]),
                )
            ),
        }

    def make_sequences(self):
        from tqdm import trange

        for s_i in trange(self.num_conditions):
            # blocks: array of length n with each element representing the block's set size and trial type
            blocks = self.block_structure.copy()

            # ================ RULES =============================================================================
            # block_rules: a list where each element is a dictionary, with keys representing a stimulus image and
            # values representing the solution for each stimulus
            # here we shuffle within rows (only the first three columns), so that it's not always the same keys
            # that have 1, 2, or 3 stimuli associated with them (no difference for rows with 2, 2, 2)
            # mix up stim/action within the rule
            R_i = {
                ns: shuffle_along_axis(keys, axis=1).tolist()
                for ns, keys in self.set_size_to_key_list.items()
            }
            block_rules = []
            for block_i, (ns, _) in enumerate(blocks):
                key_mapping = R_i[ns].pop()
                block_rules.append(
                    {
                        i: k
                        for i, k in enumerate(
                            [
                                key_i
                                for key_i in range(self.num_keys)
                                for _ in range(key_mapping[key_i])
                            ]
                        )
                    }
                )
            print(block_rules)
            # ================ STIMULI ===========================================================================
            # stim_sets: stimulus sets (folders from where images will be taken for each block)
            stim_sets = np.random.permutation(self.num_blocks) + 1

            # block_stimuli: will contain dictionaries with an image number for each stimulus
            block_stimuli = []
            for block_i, (ns, _) in enumerate(blocks):
                block_stimuli.append(
                    {
                        i: s
                        for i, s in enumerate(
                            (np.random.permutation(self.max_stims) + 1)[0:ns]
                        )
                    }
                )

            # block_seqprototypes: will contain dictionaries for each participant, with keys representing a set size and
            # values as lists with a sequence of stimuli to be presented
            # create a prototype (corresponding to stimuli rather than stimulus images) for each set size
            block_seqprototypes = []

            # block_sequences: maps block_seqprototypes to corresponding stimulus sequences based on block_stimuli
            block_sequences = []
            for block_i, (ns, _) in enumerate(blocks):
                # worse (but faster) alternative if createstimsequence doesn't work
                temp_seqprototype = []
                for _ in range(self.num_reps + 1):
                    temp_seqprototype = np.hstack(
                        (
                            temp_seqprototype,
                            (shuffled(np.arange(1, ns + 1))),
                        )
                    )
                block_seqprototypes.append(temp_seqprototype)
                # turn into stimuli (stimulus image number)
                block_sequences.append(
                    np.vectorize((block_stimuli[block_i]).get)(
                        block_seqprototypes[block_i] - 1
                    )
                )

            # ================ CSV FILE ==========================================================================
            # create csv
            # rows: stim, correct key, set size, blocks, img_folders, img_nums, trial_type
            colnames = [
                "stim",
                "correct_key",
                "set_size",
                "block",
                "img_folder",
                "stim_img",
                "trial_type",
            ]
            for block_i, (ns, block_cond) in enumerate(blocks):
                block_length = (self.num_reps + 1) * ns  # number of trials in a block

                this_block = np.full((len(colnames), block_length), np.nan)

                _, unique_idx = np.unique(block_sequences[block_i], return_index=True)
                # block_cond = trial_types[block_i]

                this_block[0] = block_seqprototypes[block_i]  # stimulus number
                this_block[1] = np.vectorize((block_rules[block_i]).get)(
                    block_seqprototypes[block_i] - 1
                )  # correct key for the stimulus number
                this_block[2] = np.repeat(ns, block_length)  # set size
                this_block[3] = np.repeat(block_i + 1, block_length)  # block number
                this_block[4] = np.repeat(
                    stim_sets[block_i], block_length
                )  # image folder
                this_block[5] = block_sequences[block_i]  # stimulus number
                this_block[6] = np.repeat(block_cond, block_length)  # trial type

                if block_i == 0:
                    train_seq = this_block
                    unique_stims = this_block[:, unique_idx]
                else:
                    train_seq = np.column_stack((train_seq, this_block))
                    unique_stims = np.column_stack(
                        (unique_stims, this_block[:, unique_idx])
                    )

            # save output
            np.savetxt(
                f"{self.to_dir}seq1_practice.csv", train_seq, delimiter=","
            )

        return train_seq

## Create sequences

In [11]:
# seqmkr = pseudoRWMSequenceMaker(exp_type=exp_type, num_reps=num_reps, num_conditions=num_conditions, use_matlab=use_matlab, exact_reps=exact_reps)
seqmkr = pseudoRWMSequenceMaker(
    exp_type=exp_type,
    num_reps=num_reps,
    num_conditions=1,
    exact_reps=exact_reps,
    max_consec_goal_reps=16,
)
# can do max_consec_goal_reps=4 for Reps3 and 16 for Conf3 (a bit slow)
seqmkr.make_sequences()

100%|██████████| 1/1 [00:00<00:00, 250.83it/s]

[{0: 0, 1: 1}, {0: 0, 1: 1}]





array([[1., 2., 1., 2., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 2., 1.,
        2., 1., 2., 1., 2., 1., 1., 2.],
       [0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 0., 1.],
       [2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2.],
       [5., 3., 5., 3., 3., 5., 5., 3., 3., 5., 3., 5., 1., 6., 1., 6.,
        1., 6., 1., 6., 1., 6., 6., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.]])

## Checks

In [17]:
# exp_type = "pseudoRWMReps3"

for i in np.arange(1, 2):
    df = pd.read_csv(
        f"../reverse-points/v1/seq{i}_practice.csv", header=None
    ).T

    colnames = [
        "stim",
        "correct_key",
        "set_size",
        "block",
        "img_folder",
        "stim_img",
        "trial_type",
        # "goal_img",
        # "nongoal_img",
    ]

    df.columns = colnames
    print(df.groupby(["set_size", "trial_type"]).block.nunique(), i, df.img_folder.unique())

set_size  trial_type
2.0       0.0           1
          1.0           1
Name: block, dtype: int64 1 [1. 2.]


In [22]:
df

Unnamed: 0,stim,correct_key,set_size,block,img_folder,stim_img,trial_type
0,1.0,0.0,2.0,1.0,1.0,5.0,1.0
1,2.0,1.0,2.0,1.0,1.0,3.0,1.0
2,1.0,0.0,2.0,1.0,1.0,5.0,1.0
3,2.0,1.0,2.0,1.0,1.0,3.0,1.0
4,2.0,1.0,2.0,1.0,1.0,3.0,1.0
5,1.0,0.0,2.0,1.0,1.0,5.0,1.0
6,1.0,0.0,2.0,1.0,1.0,5.0,1.0
7,2.0,1.0,2.0,1.0,1.0,3.0,1.0
8,2.0,1.0,2.0,1.0,1.0,3.0,1.0
9,1.0,0.0,2.0,1.0,1.0,5.0,1.0
