In [39]:
import pandas as pd
from pyprojroot import here

In [40]:
EXPERIMENT_NAME = "hawkins2020_characterizing_cued"
df_messages = pd.read_csv(here(f"harmonized_data/{EXPERIMENT_NAME}/messages.csv"))
df_trials = pd.read_csv(here(f"harmonized_data/{EXPERIMENT_NAME}/trials.csv"))

In [41]:
df_messages

Unnamed: 0,text,player_id,role,message_number,message_irrelevant,time_stamp,trial_id
0,"hello, i am going to describe them as people a...",124,describer,1,True,,1
1,is that okay?,124,describer,2,True,,1
2,yes,1,matcher,3,True,,1
3,this one kinda looks like they are looking a l...,124,describer,4,,,1
4,wearing a dress maybe.,124,describer,5,,,1
...,...,...,...,...,...,...,...
11473,pack on back,254,describer,1,,,7337
11474,"mourning, looking down",254,describer,1,,,7338
11475,"kneeling,no limbs extended",254,describer,1,,,7339
11476,"one foot on ground, other leg extended backward",254,describer,1,,,7340


In [42]:
df_messages = df_messages.sort_values(["trial_id", "message_number"])
df_messages["info"] = df_messages.apply(
    lambda x: {
        "text": x["text"],
        "player_id": x["player_id"],
        "role": x["role"],
        "message_number": x["message_number"],
    },
    axis=1,
)
df_message_lists = (
    df_messages.groupby("trial_id")["info"]
    .apply(list)
    .reset_index()
    .rename(columns={"info": "messages"})
)

In [43]:
df_trials = df_trials.sort_values(["game_id", "rep_num", "trial_num"])
df_trials = df_trials.merge(df_message_lists, on="trial_id", how="left")
df_trials["messages"]

0       [{'text': 'hi', 'player_id': 145, 'role': 'des...
1       [{'text': 'awesome. this one looks like a seal...
2       [{'text': 'this one looks like a small dog bal...
3       [{'text': 'this looks like one of the spy vs s...
4       [{'text': 'nice. this is a diamond on top of w...
                              ...                        
7370    [{'text': 'man on his knees', 'player_id': 226...
7371    [{'text': 's shaped guy', 'player_id': 226, 'r...
7372    [{'text': 'almost complete rectangle body', 'p...
7373    [{'text': 'man on one leg arm left and one leg...
7374    [{'text': 'hello', 'player_id': 157, 'role': '...
Name: messages, Length: 7375, dtype: object

In [44]:
df_message_history = (
    df_trials.groupby(["game_id", "rep_num"])["messages"]
    .apply(list)
    .reset_index()
    .rename(columns={"messages": "message_history"})
)

df_target_history = (
    df_trials.groupby(["game_id", "rep_num"])["target"]
    .apply(list)
    .reset_index()
    .rename(columns={"target": "target_history"})
)

df_history = df_message_history.merge(
    df_target_history, on=["game_id", "rep_num"], how="left"
)

In [45]:
def filter_lists(data, filter):
    """
    Filter out values from current trial and trials that come after the current trial for the same block
    """
    full_list = data[filter]
    index = data["trial_num"] - (data["rep_num"] - 1) * len(full_list) - 1
    filtered_list = full_list[:index]
    return filtered_list

In [46]:
df_trials_history = df_trials.merge(
    df_message_history, on=["game_id", "rep_num"], how="left"
).merge(df_target_history, on=["game_id", "rep_num"], how="left")
df_trials_history["message_history"] = df_trials_history.apply(
    filter_lists, filter="message_history", axis=1
)
df_trials_history["target_history"] = df_trials_history.apply(
    filter_lists, filter="target_history", axis=1
)
df_trials_history

Unnamed: 0,game_id,option_set,target,stage_num,trial_num,rep_num,exclude,exclusion_reason,condition_id,describer,matchers,trial_id,game_id_numeric,messages,message_history,target_history
0,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,B,1,1,1,,,1,145.0,23.0,1585,23,"[{'text': 'hi', 'player_id': 145, 'role': 'des...",[],[]
1,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,G,1,2,1,,,1,145.0,23.0,1586,23,[{'text': 'awesome. this one looks like a seal...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...",[B]
2,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,K,1,3,1,,,1,145.0,23.0,1587,23,[{'text': 'this one looks like a small dog bal...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G]"
3,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,A,1,4,1,,,1,145.0,23.0,1588,23,[{'text': 'this looks like one of the spy vs s...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K]"
4,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,J,1,5,1,,,1,145.0,23.0,1589,23,[{'text': 'nice. this is a diamond on top of w...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7370,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,B,1,69,6,,,1,226.0,95.0,5813,95,"[{'text': 'man on his knees', 'player_id': 226...","[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H]"
7371,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,L,1,70,6,,,1,226.0,95.0,5814,95,"[{'text': 's shaped guy', 'player_id': 226, 'r...","[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B]"
7372,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,J,1,71,6,,,1,226.0,95.0,5815,95,"[{'text': 'almost complete rectangle body', 'p...","[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L]"
7373,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,I,1,72,6,,,1,226.0,95.0,5816,95,[{'text': 'man on one leg arm left and one leg...,"[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L, J]"


In [47]:
df_message_history["message_history"][0]

[[{'text': 'hi', 'player_id': 145, 'role': 'describer', 'message_number': 1},
  {'text': 'hello', 'player_id': 23, 'role': 'matcher', 'message_number': 2},
  {'text': 'looking for a diamond at the top, triangle pointing towards the left',
   'player_id': 145,
   'role': 'describer',
   'message_number': 3},
  {'text': 'close to a person praying on their knees',
   'player_id': 145,
   'role': 'describer',
   'message_number': 4},
  {'text': 'is there a triangle pointing right as well?',
   'player_id': 23,
   'role': 'matcher',
   'message_number': 5},
  {'text': "there's a triangle on the right also. just below the diamond and to the right",
   'player_id': 145,
   'role': 'describer',
   'message_number': 6},
  {'text': 'could also be seen as a mouse in silhouette',
   'player_id': 145,
   'role': 'describer',
   'message_number': 7},
  {'text': 'anything happening at the bottom?',
   'player_id': 23,
   'role': 'matcher',
   'message_number': 8},
  {'text': "like a mountain - we've 

In [48]:
def get_user_message(messages):
    """
    Get the user message from a list of messages.
    """
    if not isinstance(messages, list):
        return ""

    user_message = ""
    for message in messages:
        user_message += f"{message['role']}: {message['text']}\n"

    return user_message

In [49]:
user_messages = df_trials["messages"].apply(get_user_message)
labels = df_trials["target"]

In [50]:
print(user_messages[0])

describer: hi
matcher: hello
describer: looking for a diamond at the top, triangle pointing towards the left
describer: close to a person praying on their knees
matcher: is there a triangle pointing right as well?
describer: there's a triangle on the right also. just below the diamond and to the right
describer: could also be seen as a mouse in silhouette
matcher: anything happening at the bottom?
describer: like a mountain - we've got 72 of these to get through
matcher: 72? oops.



In [51]:
print(labels[0])

B


In [52]:
df_message_history["message_history"][0][1]

[{'text': 'awesome. this one looks like a seal',
  'player_id': 145,
  'role': 'describer',
  'message_number': 1}]

In [53]:
def format_history(row):
    chat = []
    for messages, target in zip(row["message_history"], row["target_history"]):
        user_message = get_user_message(messages)
        target_message = target
        chat.append({"role": "user", "content": user_message})
        chat.append({"role": "assistant", "content": target_message})
    return chat

In [54]:
all_msgs = df_history.apply(format_history, axis=1)

In [55]:
all_msgs

0      [{'role': 'user', 'content': 'describer: hi
ma...
1      [{'role': 'user', 'content': 'describer: first...
2      [{'role': 'user', 'content': 'describer: mouse...
3      [{'role': 'user', 'content': 'describer: dog w...
4      [{'role': 'user', 'content': 'describer: skate...
                             ...                        
632    [{'role': 'user', 'content': 'describer: man s...
633    [{'role': 'user', 'content': 'describer: rabbi...
634    [{'role': 'user', 'content': 'describer: rabbi...
635    [{'role': 'user', 'content': 'describer: lungi...
636    [{'role': 'user', 'content': 'matcher: hello
'...
Length: 637, dtype: object

In [56]:
for msg in all_msgs.sample(n=1).iloc[0]:
    print(msg["role"].upper() + ": " + msg["content"])

USER: describer: split triangle

ASSISTANT: B
USER: describer: speech

ASSISTANT: D
USER: describer: banana

ASSISTANT: H
USER: describer: zig zag tail

ASSISTANT: K
USER: describer: karate

ASSISTANT: I
USER: describer: legs out

ASSISTANT: F
USER: describer: bunny

ASSISTANT: E
USER: describer: zig zag

ASSISTANT: L
USER: describer: high heels

ASSISTANT: G
USER: describer: one leg up

ASSISTANT: A
USER: describer: first one we did

ASSISTANT: J
USER: describer: flying good job you were quick have a good one!

ASSISTANT: C


In [57]:
df_history

Unnamed: 0,game_id,rep_num,message_history,target_history
0,0057-414228f8-c268-40d6-9349-b35df4f080d9,1,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A, J, E, H, I, C, D, L, F]"
1,0057-414228f8-c268-40d6-9349-b35df4f080d9,2,[[{'text': 'first diamond on top of state shap...,"[J, F, A, D, E, G, B, H, C, K, L, I]"
2,0057-414228f8-c268-40d6-9349-b35df4f080d9,3,"[[{'text': 'mouse', 'player_id': 145, 'role': ...","[B, J, K, L, D, A, F, G, C, E, H, I]"
3,0057-414228f8-c268-40d6-9349-b35df4f080d9,4,"[[{'text': 'dog with ball', 'player_id': 145, ...","[K, L, I, E, J, D, F, C, A, G, B, H]"
4,0057-414228f8-c268-40d6-9349-b35df4f080d9,5,"[[{'text': 'skate', 'player_id': 145, 'role': ...","[I, A, L, G, H, J, F, C, B, D, K, E]"
...,...,...,...,...
632,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,3,[[{'text': 'man standing with arms to the left...,"[D, E, K, A, H, L, J, C, G, B, I, F]"
633,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,4,"[[{'text': 'rabbit', 'player_id': 226, 'role':...","[E, K, C, G, A, B, F, J, H, D, L, I]"
634,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,5,"[[{'text': 'rabbit', 'player_id': 226, 'role':...","[E, C, A, G, L, D, H, F, J, I, K, B]"
635,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,6,"[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L, J, I]"


In [58]:
df_with_history = df_trials.merge(df_history, on=["game_id", "rep_num"], how="left")
df_with_history["message_history_trunc"] = df_with_history.apply(
    lambda x: x["message_history"][: x["trial_num"] - 1], axis=1
)

In [59]:
df_with_history

Unnamed: 0,game_id,option_set,target,stage_num,trial_num,rep_num,exclude,exclusion_reason,condition_id,describer,matchers,trial_id,game_id_numeric,messages,message_history,target_history,message_history_trunc
0,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,B,1,1,1,,,1,145.0,23.0,1585,23,"[{'text': 'hi', 'player_id': 145, 'role': 'des...","[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A, J, E, H, I, C, D, L, F]",[]
1,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,G,1,2,1,,,1,145.0,23.0,1586,23,[{'text': 'awesome. this one looks like a seal...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A, J, E, H, I, C, D, L, F]","[[{'text': 'hi', 'player_id': 145, 'role': 'de..."
2,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,K,1,3,1,,,1,145.0,23.0,1587,23,[{'text': 'this one looks like a small dog bal...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A, J, E, H, I, C, D, L, F]","[[{'text': 'hi', 'player_id': 145, 'role': 'de..."
3,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,A,1,4,1,,,1,145.0,23.0,1588,23,[{'text': 'this looks like one of the spy vs s...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A, J, E, H, I, C, D, L, F]","[[{'text': 'hi', 'player_id': 145, 'role': 'de..."
4,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,J,1,5,1,,,1,145.0,23.0,1589,23,[{'text': 'nice. this is a diamond on top of w...,"[[{'text': 'hi', 'player_id': 145, 'role': 'de...","[B, G, K, A, J, E, H, I, C, D, L, F]","[[{'text': 'hi', 'player_id': 145, 'role': 'de..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7370,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,B,1,69,6,,,1,226.0,95.0,5813,95,"[{'text': 'man on his knees', 'player_id': 226...","[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L, J, I]","[[{'text': 'lunging', 'player_id': 226, 'role'..."
7371,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,L,1,70,6,,,1,226.0,95.0,5814,95,"[{'text': 's shaped guy', 'player_id': 226, 'r...","[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L, J, I]","[[{'text': 'lunging', 'player_id': 226, 'role'..."
7372,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,J,1,71,6,,,1,226.0,95.0,5815,95,"[{'text': 'almost complete rectangle body', 'p...","[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L, J, I]","[[{'text': 'lunging', 'player_id': 226, 'role'..."
7373,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,I,1,72,6,,,1,226.0,95.0,5816,95,[{'text': 'man on one leg arm left and one leg...,"[[{'text': 'lunging', 'player_id': 226, 'role'...","[G, C, K, A, E, D, F, H, B, L, J, I]","[[{'text': 'lunging', 'player_id': 226, 'role'..."


In [None]:
df_with_history.to_csv("trials_with_history.csv", index=False)

In [25]:
m_hist = df_with_history["message_history"].iloc[0]

In [35]:
lst = ["a", "b", "c"]
lst[0:0]

[]

In [24]:
df_with_history["messages"].iloc[0]

[{'text': 'hi', 'player_id': 145, 'role': 'describer', 'message_number': 1},
 {'text': 'hello', 'player_id': 23, 'role': 'matcher', 'message_number': 2},
 {'text': 'looking for a diamond at the top, triangle pointing towards the left',
  'player_id': 145,
  'role': 'describer',
  'message_number': 3},
 {'text': 'close to a person praying on their knees',
  'player_id': 145,
  'role': 'describer',
  'message_number': 4},
 {'text': 'is there a triangle pointing right as well?',
  'player_id': 23,
  'role': 'matcher',
  'message_number': 5},
 {'text': "there's a triangle on the right also. just below the diamond and to the right",
  'player_id': 145,
  'role': 'describer',
  'message_number': 6},
 {'text': 'could also be seen as a mouse in silhouette',
  'player_id': 145,
  'role': 'describer',
  'message_number': 7},
 {'text': 'anything happening at the bottom?',
  'player_id': 23,
  'role': 'matcher',
  'message_number': 8},
 {'text': "like a mountain - we've got 72 of these to get thro

In [28]:
len(m_hist)

12

In [31]:
len(m_hist[0])

10

In [32]:
messages = df_trials["messages"].iloc[0]

In [34]:
messages

[{'text': 'hi', 'player_id': 145, 'role': 'describer', 'message_number': 1},
 {'text': 'hello', 'player_id': 23, 'role': 'matcher', 'message_number': 2},
 {'text': 'looking for a diamond at the top, triangle pointing towards the left',
  'player_id': 145,
  'role': 'describer',
  'message_number': 3},
 {'text': 'close to a person praying on their knees',
  'player_id': 145,
  'role': 'describer',
  'message_number': 4},
 {'text': 'is there a triangle pointing right as well?',
  'player_id': 23,
  'role': 'matcher',
  'message_number': 5},
 {'text': "there's a triangle on the right also. just below the diamond and to the right",
  'player_id': 145,
  'role': 'describer',
  'message_number': 6},
 {'text': 'could also be seen as a mouse in silhouette',
  'player_id': 145,
  'role': 'describer',
  'message_number': 7},
 {'text': 'anything happening at the bottom?',
  'player_id': 23,
  'role': 'matcher',
  'message_number': 8},
 {'text': "like a mountain - we've got 72 of these to get thro

In [84]:
all_msgs[0][0]

{'role': 'user',
 'content': "describer: hi\nmatcher: hello\ndescriber: looking for a diamond at the top, triangle pointing towards the left\ndescriber: close to a person praying on their knees\nmatcher: is there a triangle pointing right as well?\ndescriber: there's a triangle on the right also. just below the diamond and to the right\ndescriber: could also be seen as a mouse in silhouette\nmatcher: anything happening at the bottom?\ndescriber: like a mountain - we've got 72 of these to get through\nmatcher: 72? oops.\n"}