In [3]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

# Constant

In [4]:
seq_folder = "/Users/ccnlab/Development/sequences/shaping/v5/"

ALL_DIRECTIONS = ["top", "bottom", "left", "right"]
OUTPUT_COL_ORDER = [
    "stim",
    "correct_key",
    "block",
    "img_folder",
    "top_img",
    "bottom_img",
    "left_img",
    "right_img",
    "top_key",
    "bottom_key",
    "left_key",
    "right_key",
    "set_size",    
]

# Helpers

In [5]:
from seq_utils import generate_sequence_optimized, shuffle_with_mask, swap_by_indices


def generate_food_map(num_food, num_trials):
    base = [i for i in range(num_food)]
    food_map = []
    for i in range(num_trials):
        current_map = base.copy()
        np.random.shuffle(current_map)
        food_map.append(current_map)
        base = current_map

    return food_map


def generate_shaping_block(num_directions, num_iter_per_stim, correct_key_mapping):
    seq_data = {
        "stim": [],
        "correct_key": [],
        "top_img": [],
        "bottom_img": [],
        "left_img": [],
        "right_img": [],
    }
    sequence = generate_sequence_optimized(num_directions, num_iter_per_stim)
    food_map = generate_food_map(num_directions, len(sequence))
    for i, seq in enumerate(sequence):
        seq_data["stim"].append(food_map[i][seq])
        seq_data["correct_key"].append(correct_key_mapping[ALL_DIRECTIONS[seq]])
        for j, food in enumerate(food_map[i]):
            seq_data[f"{ALL_DIRECTIONS[j]}_img"].append(food)

    for k, v in correct_key_mapping.items():
        seq_data[f"{k}_key"] = [v] * len(sequence)

    return seq_data


def shuffle_with_consecutive_check(stim_seq, key_dir, idx_check=1):
    # Pair stim_seq and key_dir by index and shuffle the pairs
    # Shuffle pairs while ensuring no consecutive key_dir values
    max_attempts = 9000
    for attempt in range(max_attempts):
        paired_data = list(zip(stim_seq, key_dir))

        np.random.shuffle(paired_data)

        # Check if any consecutive elements have the same key_dir
        consecutive_same = False
        for i in range(len(paired_data) - 1):
            if (
                paired_data[i][idx_check] == paired_data[i + 1][idx_check]
            ):  # Compare key_dir values
                # Find a successive element with different key_dir to swap with
                swap_idx = None
                for j in range(i + 1, len(paired_data)):
                    if (paired_data[j][idx_check] != paired_data[i][idx_check]) and (
                        paired_data[j][idx_check] != paired_data[i - 1][idx_check]
                    ):
                        swap_idx = j
                        break

                if swap_idx is not None:
                    paired_data[i], paired_data[swap_idx] = (
                        paired_data[swap_idx],
                        paired_data[i],
                    )

            if (
                paired_data[i][idx_check] == paired_data[i + 1][idx_check]
            ):  # Compare key_dir values
                consecutive_same = True
                break

        if not consecutive_same:
            break
    else:
        print(
            f"Warning: Could not find arrangement without consecutive key_dir after {max_attempts} attempts"
        )
    return paired_data


def generate_seq_pair(num_stims, num_iter_per_stim, num_directions=4):
    stim_seq = np.repeat(np.arange(num_stims), num_iter_per_stim)
    key_dir = np.tile(np.arange(num_directions), len(stim_seq) // 4)
    for i in [0, 1]:
        # best effort to avoid consecutive in stim seq first and then key_dir
        paired_data = shuffle_with_consecutive_check(stim_seq, key_dir, i)
        stim_seq, key_dir = zip(*paired_data)
    
    return np.array(stim_seq), np.array(key_dir)


def generate_non_shaping_block(
    num_directions, num_iter_per_stim, correct_key_mapping, stim_food_mapping
):
    seq_data = {
        "stim": [],
        "correct_key": [],
        "top_img": [],
        "bottom_img": [],
        "left_img": [],
        "right_img": [],
    }
    set_size = len(stim_food_mapping)
    # correct_dir_seq = generate_sequence_optimized(num_directions, num_iter_per_stim)
    # stim_seq = generate_sequence_optimized(num_directions, num_iter_per_stim)
    stim_seq, correct_dir_seq = generate_seq_pair(
        set_size, num_iter_per_stim, num_directions
    )
    seq_data["stim"] = stim_seq
    all_food_indexes = np.array([0, 1, 2, 3])
    for i, k_dir in enumerate(correct_dir_seq):
        correct_key = correct_key_mapping[ALL_DIRECTIONS[k_dir]]
        stim_food_pos = k_dir
        stim_food = stim_food_mapping[stim_seq[i]]
        base = swap_by_indices(all_food_indexes, stim_food, stim_food_pos)
        food_map = shuffle_with_mask(
            base, np.array([i == stim_food_pos for i in range(num_directions)])
        )
        for j, food in enumerate(food_map):
            seq_data[f"{ALL_DIRECTIONS[j]}_img"].append(food)
        seq_data["correct_key"].append(correct_key)

    for k, v in correct_key_mapping.items():
        seq_data[f"{k}_key"] = [v] * len(correct_dir_seq)

    seq_data["set_size"] = [set_size] * len(correct_dir_seq)
    return seq_data


def get_key_mapping_by_level(level):
    if level == "easy":
        seq = [0, 1, 2, 3]
    elif level == "medium":
        seq = [3, 0, 1, 2]  # [2, 1, 0, 3] #"top", "bottom", "left", "right"
    elif level == "hard":
        seq = [0, 2, 3, 1]  # [1, 3, 2, 0]
    else:
        raise ValueError(f"Invalid level: {level}")

    return seq


def generate_key_mapping(num_directions, bs, level=None):
    hard_seq = [[0, 2, 3, 1], [1, 3, 2, 0]]
    medium_seq = [[3, 0, 1, 2], [2, 1, 0, 3]]
    if level is None:
        seq = np.random.permutation(num_directions)
    else:
        seq = hard_seq[bs] if level == "hard" else medium_seq[bs]

    correct_key_mapping = {k: v for k, v in zip(ALL_DIRECTIONS, seq)}
    return correct_key_mapping


def generate_food_mapping(num_food, num_chars=4):
    base_array = np.random.permutation(num_food)
    stim_food_mapping = {i: f for i, f in enumerate(base_array)}
    other_chars = num_chars - num_food
    # Draw 2 additional numbers from range 1 to num_food
    additional_foods = np.random.choice(
        range(num_food), size=other_chars, replace=False
    )
    for i, f in enumerate(additional_foods):
        stim_food_mapping[num_food + i] = f

    return stim_food_mapping

In [6]:
from itertools import combinations

# Generate all combinations of 2 items from [0, 1, 2, 3]
NUM_IMG_FOLDERS = 4
items = np.arange(NUM_IMG_FOLDERS)
num_villages = 2
combos = list(combinations(items, num_villages))
# Output: [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
# Convert to lists if needed
combos_as_lists = [list(combo) for combo in combos]
print(combos_as_lists)
# Output: [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]
set_size_list = [[4, 6], [6, 4]]*3
print(set_size_list)

[[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]
[[4, 6], [6, 4], [4, 6], [6, 4], [4, 6], [6, 4]]


# Shaping sequence

In [7]:
def generate_shaping_round(
    num_directions,
    iterations,
    correct_key_mapping,
    stim_food_mapping,
    img_set,
    bs,
    last_nonshaping_block,
):
    shaping_block = pd.DataFrame(
        generate_shaping_block(num_directions, iterations[0], correct_key_mapping)
    )

    shaping_block["block"] = bs * 2
    shaping_block["img_folder"] = img_set + 1
    shaping_block["set_size"] = len(stim_food_mapping)

    nonshaping_block = last_nonshaping_block.copy()
    nonshaping_block["block"] = bs * 2 + 1
    nonshaping_block["img_folder"] = img_set + 1
    return pd.concat([shaping_block, nonshaping_block])


def generate_nonshaping_round(
    num_directions,
    iterations,
    correct_key_mapping,
    stim_food_mapping,
    img_set,
    bs,
    last_nonshaping_block,
):
    all_blocks = []
    last_block_num = 0
    for b in range(len(iterations)):
        nonshaping_block = pd.DataFrame(
            generate_non_shaping_block(
                num_directions, iterations[b], correct_key_mapping, stim_food_mapping
            )
        )

        last_block_num = bs * (len(iterations) + 1) + b
        nonshaping_block["block"] = last_block_num
        nonshaping_block["img_folder"] = img_set + 1
        all_blocks.append(nonshaping_block)

    nonshaping_block = last_nonshaping_block.copy()
    nonshaping_block["block"] = last_block_num + 1
    nonshaping_block["img_folder"] = img_set + 1
    all_blocks.append(nonshaping_block)

    return pd.concat(all_blocks)


def generate_learning_round(
    set_comb,
    set_size_comb,
    level,
    iter_by_setsz,
    last_num_stim_iter=18,
    num_dir=4,
    num_food=4,
):
    shaping_blocks, nonshaping_blocks = [], []
    test_correct_key_mapping, test_stim_food_mapping = {}, {}
    for bs, img_set in enumerate(set_comb):
        correct_key_mapping = generate_key_mapping(num_dir, bs, level)
        stim_food_mapping = generate_food_mapping(
            num_food,
            set_size_comb[bs],
        )
        if bs == 0:
            test_correct_key_mapping = correct_key_mapping
            test_stim_food_mapping = stim_food_mapping

        last_nonshaping_block = pd.DataFrame(
            generate_non_shaping_block(
                num_dir, last_num_stim_iter, correct_key_mapping, stim_food_mapping
            )
        )
        iterations = [iter_by_setsz[num_dir]]
        shaping_blocks.append(
            generate_shaping_round(
                num_dir,
                iterations,
                correct_key_mapping,
                stim_food_mapping,
                img_set,
                bs,
                last_nonshaping_block,
            )
        )
        iterations = [iter_by_setsz[set_size_comb[bs]]]
        nonshaping_blocks.append(
            generate_nonshaping_round(
                num_dir,
                iterations,
                correct_key_mapping,
                stim_food_mapping,
                img_set,
                bs,
                last_nonshaping_block,
            )
        )

    return (
        shaping_blocks,
        nonshaping_blocks,
        test_correct_key_mapping,
        test_stim_food_mapping,
    )

## Learning

In [8]:
NUM_DIRECTIONS = 4
NUM_FOOD = 4
NUM_TEST_ITER = NUM_DIRECTIONS*2
version = "v5"
NUM_ITER_PER_STIM = 18 if version == "v5" else 13
if version == "v2":
    iter_by_setsz = {4: 13}
elif version == "v3":
    iter_by_setsz = {4: 6}
elif version == "v4":
    iter_by_setsz = {4: 20}
elif version == "v5":
    iter_by_setsz = {4: 12, 6: 8}

for level in ['medium', 'hard']:
    # potential image sets [0, 1], [2, 3], [0,2], [1,3], [0,3], [1,2]
    for seq_idx, set_comb in enumerate(combos_as_lists):
        print('generating for seq_idx', level, seq_idx)
        shaping_blocks, nonshaping_blocks, test_correct_key_mapping, test_stim_food_mapping = generate_learning_round(
            set_comb, set_size_list[seq_idx], level, iter_by_setsz, NUM_ITER_PER_STIM, NUM_DIRECTIONS, NUM_FOOD
        )

        for name, data in zip(
            ["shaping", "nonshaping"], [shaping_blocks, nonshaping_blocks]
        ):
            concated_data = pd.concat(data).reset_index(drop=True)
            concated_data["block"] = concated_data["block"] + 1
            concated_data = concated_data[OUTPUT_COL_ORDER]
            concated_data.to_csv(f"{seq_folder}/{name}_{level}{seq_idx}_learning.csv", index=False)

        # testing round - two iteration per direction per stimulus
        testing_data = pd.DataFrame(generate_non_shaping_block(
            NUM_DIRECTIONS, NUM_TEST_ITER, test_correct_key_mapping, test_stim_food_mapping
        ))
        testing_data["block"] = 1
        testing_data["img_folder"] = set_comb[0] + 1
        testing_data["set_size"] = len(test_stim_food_mapping)
        testing_data = testing_data[OUTPUT_COL_ORDER]
        testing_data.to_csv(f"{seq_folder}/{level}{seq_idx}_testing.csv", index=False)

testing_data.head(4)

generating for seq_idx medium 0
generating for seq_idx medium 1
generating for seq_idx medium 2
generating for seq_idx medium 3
generating for seq_idx medium 4
generating for seq_idx medium 5
generating for seq_idx hard 0
generating for seq_idx hard 1
generating for seq_idx hard 2
generating for seq_idx hard 3
generating for seq_idx hard 4
generating for seq_idx hard 5


Unnamed: 0,stim,correct_key,block,img_folder,top_img,bottom_img,left_img,right_img,top_key,bottom_key,left_key,right_key,set_size
0,5,1,1,3,2,3,1,0,0,2,3,1,6
1,5,2,1,3,2,0,3,1,0,2,3,1,6
2,0,3,1,3,0,1,2,3,0,2,3,1,6
3,1,0,1,3,1,0,3,2,0,2,3,1,6


In [83]:
# Check the stimulus to correct key mapping counts
block_data = testing_data[testing_data.block == 1]
stim_correct_key_counts = block_data.groupby(['stim', 'correct_key']).size().unstack(fill_value=0)
print("Stimulus to correct key mapping counts:")
print(stim_correct_key_counts)
print()

# Also show the distribution for each stimulus
print("Distribution of correct keys for each stimulus:")
for stim in sorted(block_data['stim'].unique()):
    stim_data = block_data[block_data['stim'] == stim]
    key_counts = stim_data['correct_key'].value_counts().sort_index()
    print(f"Stimulus {stim}: {dict(key_counts)}")


Stimulus to correct key mapping counts:
correct_key  0  1  2  3
stim                   
0            2  2  2  2
1            2  2  2  2
2            2  2  2  2
3            2  2  2  2
4            2  2  2  2
5            2  2  2  2

Distribution of correct keys for each stimulus:
Stimulus 0: {0: 2, 1: 2, 2: 2, 3: 2}
Stimulus 1: {0: 2, 1: 2, 2: 2, 3: 2}
Stimulus 2: {0: 2, 1: 2, 2: 2, 3: 2}
Stimulus 3: {0: 2, 1: 2, 2: 2, 3: 2}
Stimulus 4: {0: 2, 1: 2, 2: 2, 3: 2}
Stimulus 5: {0: 2, 1: 2, 2: 2, 3: 2}


# Top shaping block helpers

In [None]:
def generate_top_shaping_block(num_iter_per_stim, stim_food_mapping):
    seq_data = {
        "stim": [],
        "correct_key": [],
        "top_img": [],
        "bottom_img": [],
        "left_img": [],
        "right_img": [],
    }
    num_stim = len(stim_food_mapping)
    correct_key_seq = generate_sequence_optimized(num_stim, num_iter_per_stim)
    stim_seq = generate_sequence_optimized(num_stim, num_iter_per_stim)
    seq_data["stim"] = stim_seq

    for i, stim in enumerate(stim_seq):
        stim_food = stim_food_mapping[stim]
        stim_food_pos = correct_key_seq[i]
        # put the food in the correct position
        base = swap_by_indices(np.array([0, 1, 2, 3]), stim_food, stim_food_pos)
        # shuffle the food map except the correct position
        food_map = shuffle_with_mask(
            base, np.array([i == stim_food_pos for i in range(num_stim)])
        )
        for j, food in enumerate(food_map):
            seq_data[f"{ALL_DIRECTIONS[j]}_img"].append(food)
        seq_data["correct_key"].append(stim_food_pos)

    for k, v in zip(ALL_DIRECTIONS, [0, 1, 2, 3]):
        seq_data[f"{k}_key"] = [v] * len(correct_key_seq)

    return seq_data

def generate_shaping_round_v2(
    num_directions, iterations, correct_key_mapping, stim_food_mapping, img_set, bs, last_nonshaping_block
):
    top_shaping_block = pd.DataFrame(
        generate_top_shaping_block(iterations[0], stim_food_mapping)
    )    
    shaping_block = pd.DataFrame(
        generate_shaping_block(num_directions, iterations[1], correct_key_mapping)
    )
    nonshaping_block = last_nonshaping_block.copy()
    top_shaping_block["block"] = bs * 3
    top_shaping_block["img_folder"] = img_set + 1
    shaping_block["block"] = bs * 3 + 1
    shaping_block["img_folder"] = img_set + 1
    nonshaping_block["block"] = bs * 3 + 2
    nonshaping_block["img_folder"] = img_set + 1
    return pd.concat([top_shaping_block, shaping_block, nonshaping_block])
