In [72]:
import pandas as pd
from pyprojroot import here

In [73]:
EXPERIMENT_NAME = "hawkins2020_characterizing_cued"
df_messages = pd.read_csv(here(f"harmonized_data/{EXPERIMENT_NAME}/messages.csv"))
df_trials = pd.read_csv(here(f"harmonized_data/{EXPERIMENT_NAME}/trials.csv"))

In [74]:
df_messages

Unnamed: 0,text,player_id,role,message_number,message_irrelevant,time_stamp,trial_id
0,"hello, i am going to describe them as people a...",124,describer,1,True,,1
1,is that okay?,124,describer,2,True,,1
2,yes,1,matcher,3,True,,1
3,this one kinda looks like they are looking a l...,124,describer,4,,,1
4,wearing a dress maybe.,124,describer,5,,,1
...,...,...,...,...,...,...,...
11473,pack on back,254,describer,1,,,7337
11474,"mourning, looking down",254,describer,1,,,7338
11475,"kneeling,no limbs extended",254,describer,1,,,7339
11476,"one foot on ground, other leg extended backward",254,describer,1,,,7340


In [75]:
df_messages = df_messages.sort_values(["trial_id", "message_number"])
df_messages["info"] = df_messages.apply(
    lambda x: {
        "text": x["text"],
        "player_id": x["player_id"],
        "role": x["role"],
        "message_number": x["message_number"],
    },
    axis=1,
)
df_message_lists = (
    df_messages.groupby("trial_id")["info"]
    .apply(list)
    .reset_index()
    .rename(columns={"info": "messages"})
)

In [76]:
df_trials = df_trials.sort_values(["game_id", "rep_num", "trial_num"])
df_trials = df_trials.merge(df_message_lists, on="trial_id", how="left")

In [77]:
df_trials

Unnamed: 0,game_id,option_set,target,stage_num,trial_num,rep_num,exclude,exclusion_reason,condition_id,describer,matchers,trial_id,game_id_numeric,messages
0,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,B,1,1,1,,,1,145.0,23.0,1585,23,"[{'text': 'hi', 'player_id': 145, 'role': 'des..."
1,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,G,1,2,1,,,1,145.0,23.0,1586,23,[{'text': 'awesome. this one looks like a seal...
2,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,K,1,3,1,,,1,145.0,23.0,1587,23,[{'text': 'this one looks like a small dog bal...
3,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,A,1,4,1,,,1,145.0,23.0,1588,23,[{'text': 'this looks like one of the spy vs s...
4,0057-414228f8-c268-40d6-9349-b35df4f080d9,A;B;C;D;E;F;G;H;I;J;K;L,J,1,5,1,,,1,145.0,23.0,1589,23,[{'text': 'nice. this is a diamond on top of w...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7370,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,B,1,69,6,,,1,226.0,95.0,5813,95,"[{'text': 'man on his knees', 'player_id': 226..."
7371,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,L,1,70,6,,,1,226.0,95.0,5814,95,"[{'text': 's shaped guy', 'player_id': 226, 'r..."
7372,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,J,1,71,6,,,1,226.0,95.0,5815,95,"[{'text': 'almost complete rectangle body', 'p..."
7373,9684-5db90167-5d0b-40a4-9ec2-1c80b928608b,A;B;C;D;E;F;G;H;I;J;K;L,I,1,72,6,,,1,226.0,95.0,5816,95,[{'text': 'man on one leg arm left and one leg...


In [85]:
def get_user_message(messages):
    """
    Get the user message from a list of messages.
    """
    if not isinstance(messages, list):
        return ""

    user_message = ""
    for message in messages:
        user_message += f"{message['role']}: {message['text']}\n"

    return user_message

In [90]:
user_messages = df_trials["messages"].apply(get_user_message)
labels = df_trials["target"]

In [91]:
print(user_messages[0])

describer: hi
matcher: hello
describer: looking for a diamond at the top, triangle pointing towards the left
describer: close to a person praying on their knees
matcher: is there a triangle pointing right as well?
describer: there's a triangle on the right also. just below the diamond and to the right
describer: could also be seen as a mouse in silhouette
matcher: anything happening at the bottom?
describer: like a mountain - we've got 72 of these to get through
matcher: 72? oops.



In [None]:
print(labels[0])