# Load, combine, and decode shapes

In [5]:
import pandas as pd

def decode_shape_binaries_str(encoded_str, bits=10):
    """
    Decodes a single string of space-separated decimal codes into
    a 2D list (shape) of 0/1 bits. Each code becomes one row in the shape.

    :param encoded_str: A single string with space-separated decimal values
                        (e.g., "1016 64 64 64").
    :param bits: The fixed width of the binary representation (default=10).
    :return: A list of lists, where each sub-list is a row of bits (0's and 1's).
    """
    # Split the string by spaces to get each code as a separate token
    codes = encoded_str.split()

    shape = []
    for code in codes:
        # Convert the code (string) to an integer
        number = int(code)

        # Convert to binary, left-padded with zeros to the desired bit length
        binary_str = format(number, 'b').rjust(bits, '0')

        # Convert the binary string into a list of integer bits (0 or 1)
        row_of_bits = [int(bit) for bit in binary_str]
        shape.append(row_of_bits)

    return shape



In [7]:
def encode_shape_binaries(shape, bits=10):
    """
    Encodes a 2D list (shape) of 0/1 bits into a single string of
    space-separated decimal codes. Each row in the shape becomes one code.

    :param shape: A list of lists, where each sub-list is a row of bits (0's and 1's).
    :param bits: The fixed width of the binary representation (default=10).
    :return: A single string with space-separated decimal values
             (e.g., "1016 64 64 64").
    """
    codes = []
    for row in shape:
        if 1 not in row:
            continue
        # Convert the list of bits into a binary string
        binary_str = ''.join([str(bit) for bit in row])

        # Convert the binary string into an integer
        number = int(binary_str, 2)

        codes.append(str(number))

    return ' '.join(codes)

shape = [[1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
         [1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
         [0,0,0,0,0,0,0,0,0,0]]

shape = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
print(encode_shape_binaries(shape))

1023


In [11]:
# print(decode_shape_binaries_str("640 1020"))
print(decode_shape_binaries_str("6 1018"))
print(decode_shape_binaries_str("14 1009"))
print(decode_shape_binaries_str("62 961"))

[[0, 0, 0, 0, 0, 0, 0, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 0, 1, 0]]
[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0, 0, 1]]
[[0, 0, 0, 0, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 0, 0, 0, 0, 0, 1]]


In [9]:
print(decode_shape_binaries_str("256 511"))
print(decode_shape_binaries_str("128 511"))
print(decode_shape_binaries_str("128 255 128"))
print(decode_shape_binaries_str("128 255 64"))
print(decode_shape_binaries_str("64 255 64"))
print(decode_shape_binaries_str("7 124 192"))
print(decode_shape_binaries_str("2 14 126"))

[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]]
[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]]
[[0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]]
[[0, 0, 0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1, 1, 1, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0, 0, 0]]
[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1, 1, 1, 0], [0, 0, 0, 1, 1, 1, 1, 1, 1, 0]]


Example with a single participant's data

In [2]:

# fp = "/Users/sarawu/Desktop/CAB/benchmarking-creativity/creative-foraging-human-data/Games/20120613_122200.txt"
fp = "creative-foraging-human-data/Games/20120613_122200.txt"

# Load and name columns
df = pd.read_csv(fp, sep="\t", header=None)
df.columns = ["shape", "timestamp", "timestamp_gallery"] # add column names
print(df.shape)
display(df.head())

# Get next move
df['next_shape'] = df['shape'].shift(-1)

# Decode the shape binaries
df['shape_matrix'] = df['shape'].apply(decode_shape_binaries_str) # Create a column that contains the shape matrix, applied to each row
df['shape_matrix_str'] = df['shape'].apply(lambda x: "\n".join(["".join(map(str, row)) for row in decode_shape_binaries_str(x)]))

df.head()

(280, 3)


Unnamed: 0,shape,timestamp,timestamp_gallery
0,1023,15.712,
1,1023,16.927,31.45
2,512 1022,72.191,
3,512 1020 4,74.19,
4,256 1020 4,83.084,


Unnamed: 0,shape,timestamp,timestamp_gallery,next_shape,shape_matrix,shape_matrix_str
0,1023,15.712,,1023,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]",1111111111
1,1023,16.927,31.45,512 1022,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]",1111111111
2,512 1022,72.191,,512 1020 4,"[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, ...",1000000000\n1111111110
3,512 1020 4,74.19,,256 1020 4,"[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, ...",1000000000\n1111111100\n0000000100
4,256 1020 4,83.084,,256 1020 8,"[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, ...",0100000000\n1111111100\n0000000100


## Load all data

In [13]:

# Iterate through all the participants in the dataset
import os

df_list = []
for participant_file in os.listdir("creative-foraging-human-data/Games"):
    # print(participant_file)
    with open("creative-foraging-human-data/Games/"+participant_file, 'r') as f:
        df = pd.read_csv(f, sep="\t", header=None)
        df.columns = ["shape", "timestamp", "timestamp_gallery"]
        df['next_shape'] = df['shape'].shift(-1)
        df['game_file'] = participant_file
        df['shape_matrix'] = df['shape'].apply(decode_shape_binaries_str)
        df['shape_matrix_str'] = df['shape'].apply(lambda x: "\n".join(["".join(map(str, row)) for row in decode_shape_binaries_str(x)]))
        df_list.append(df)

df = pd.concat(df_list)
print(df.shape)
display(df.head())

# Save the processed data
df.to_csv("data/all-games.tsv", index=False, sep="\t", quoting=2)


(32319, 7)


Unnamed: 0,shape,timestamp,timestamp_gallery,next_shape,game_file,shape_matrix,shape_matrix_str
0,1023,16.038,,1023,20120513_091629.txt,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]",1111111111
1,1023,17.168,29.65,2 1022,20120513_091629.txt,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]",1111111111
2,2 1022,76.165,,4 1020 512,20120513_091629.txt,"[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [1, 1, 1, 1, ...",0000000010\n1111111110
3,4 1020 512,79.6,,4 1020 256,20120513_091629.txt,"[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [1, 1, 1, 1, ...",0000000100\n1111111100\n1000000000
4,4 1020 256,81.315,,520 1016 512,20120513_091629.txt,"[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [1, 1, 1, 1, ...",0000000100\n1111111100\n0100000000


In [32]:
# Modify human data: add an ID column that identifies each shape, remove gallery column
# Give OpenAI model a single participant's game data (without the gallery information) and ask it to pick its favorite shapes
for game in df:
    df['shape_ID'] = df['shape'].astype('category').cat.codes + 1

unique_filenames = df['game_file'].unique()
filename_to_id = {filename: i+1 for i, filename in enumerate(unique_filenames)}

# Create the new column using the mapping
df['participant_ID'] = df['game_file'].map(filename_to_id)

# Display the result
print(df[['game_file', 'participant_ID']])
print(df.head)


               game_file  participant_ID
0    20120513_091629.txt               1
1    20120513_091629.txt               1
2    20120513_091629.txt               1
3    20120513_091629.txt               1
4    20120513_091629.txt               1
..                   ...             ...
365  20120611_131956.txt             100
366  20120611_131956.txt             100
367  20120611_131956.txt             100
368  20120611_131956.txt             100
369  20120611_131956.txt             100

[32319 rows x 2 columns]
<bound method NDFrame.head of                    shape  timestamp   timestamp_gallery           next_shape  \
0                   1023     16.038                                     1023   
1                   1023     17.168  29.650000000000002               2 1022   
2                 2 1022     76.165                               4 1020 512   
3             4 1020 512     79.600                               4 1020 256   
4             4 1020 256     81.315                 

In [33]:

# print(df.head)

LLM_evaluation_df = df.copy()

LLM_evaluation_df = LLM_evaluation_df[['participant_ID', 'shape_ID', 'shape', 'game_file', 'shape_matrix']]

unique_shapes_df = LLM_evaluation_df.drop_duplicates(subset=['game_file', 'shape'])
# print(unique_shapes_df.head)

randomized_order_df = unique_shapes_df.groupby('game_file').apply(
    lambda x: x.sample(frac=1, random_state=42)
).reset_index(drop=True)
print(randomized_order_df)

# Group the data by the ID column
grouped = randomized_order_df.groupby('participant_ID')  # Replace 'id' with your actual ID column name

# Save each group to a separate CSV file
output_dir = "human_participants_blinded"
os.makedirs(output_dir, exist_ok=True)

for participant_ID, group in grouped:
    # Creating a filename based on the ID
    filename = os.path.join(output_dir, f'data_human_{participant_ID}.csv')
    
    # Save the group to a CSV file
    group.to_csv(filename, index=False)
    
    print(f'Saved file: {filename}')

       participant_ID  shape_ID                shape            game_file  \
0                  14      7418    64 64 960 576 576  20110530_180623.txt   
1                  14      5188      448 768 256 480  20110530_180623.txt   
2                  14      7964             768 1020  20110530_180623.txt   
3                  14      4491      384 224 896 640  20110530_180623.txt   
4                  14      6176  512 768 384 960 256  20110530_180623.txt   
...               ...       ...                  ...                  ...   
26875              16      3840          288 992 896  20120613_122200.txt   
26876              16      4182         320 1008 160  20120613_122200.txt   
26877              16      5521          480 960 320  20120613_122200.txt   
26878              16     10026          960 192 240  20120613_122200.txt   
26879              16      4296      320 960 896 256  20120613_122200.txt   

                                            shape_matrix  
0      [[0, 0, 0

  randomized_order_df = unique_shapes_df.groupby('game_file').apply(
