In [17]:
#363 vs 361
# with and without transition statement
# with and without response times
# with and without I don't know only.
# with random names and with sequential user1, user2, etc...
# should include username to demographics

In [18]:
import pandas as pd

df = pd.read_csv("experiment_data/sqb/sqb_data.csv")

In [19]:
sqb_markers = ["National Highway Safety", "You are a serious reader", "When evaluating teaching job offers,"]

In [20]:
class Participant:

    transcript = ""
    transcript_with_response_times = ""
    choice_problem = ""

    def process_transcript(self, chat_transcript):
        # Transcript without response times
        transcript = ""
        for msg in chat_transcript:
            if any(marker in msg['content'] for marker in sqb_markers):
                break
            transcript += f"{msg['role']}: {msg['content']}\n"

        # Transcript with response times
        transcript_with_response_times = ""
        for i, msg in enumerate(chat_transcript):

            if any(marker in msg['content'] for marker in sqb_markers):
                break

            time_diff = None

            if i > 1:
                if msg['timestamp']['client_timestamp'] != 0 and chat_transcript[i-1]['timestamp']['client_timestamp'] != 0:
                    time_diff = msg['timestamp']['client_timestamp'] - chat_transcript[i-1]['timestamp']['client_timestamp']
                else:
                    try:
                        time_diff = msg['timestamp']['server_timestamp'] - chat_transcript[i-1]['timestamp']['server_timestamp']
                    except KeyError:
                        time_diff = -1

            # Add a blank line before each message except the first
            if i > 0:
                transcript_with_response_times += "\n"

            if msg['role'] == 'user' and i > 1:
                transcript_with_response_times += f"{msg['role']}: {msg['content']} (response time: {time_diff} ms)"
            else:
                transcript_with_response_times += f"{msg['role']}: {msg['content']}"

        # Extract the choice problem statement
        choice_problem = "\n".join(f"{msg['role']}: {msg['content']}" for msg in chat_transcript if any(marker in msg['content'] for marker in sqb_markers))
        choice_problem = choice_problem.replace("assistant: ", "")

        self.transcript = transcript
        self.transcript_with_response_times = transcript_with_response_times
        self.choice_problem = choice_problem


    def __init__(self, participant_id = None, condition = None, prolific_data = None, chat_transcript = None, change_username = None, change_assistantname = None):
        # Unique identifier for the participant
        self.Participant_ID = participant_id

        # Experiment condition
        self.condition = condition

        # Demographic info from Prolific
        self.prolific_data = prolific_data

        # Chat Data
        self.chat_transcript = chat_transcript if chat_transcript is not None else []

        if self.chat_transcript is not None:
            self.process_transcript(self.chat_transcript)

        if change_username is not None:
            self.change_user_name(change_username)

        if change_assistantname is not None:
            self.change_assistant_name(change_assistantname)

    def update_participant_id(self, participant_id):
        self.Participant_ID = participant_id

    def update_condition(self, condition):
        self.condition = condition

    def update_prolific_data(self, key, value):
        if self.prolific_data is not None:
            self.prolific_data[key] = value

    def update_chat_transcript(self, chat_transcript):
        self.chat_transcript = self.process_transcript(chat_transcript)

    def get_participant_id(self):
        return self.Participant_ID

    def get_demographics(self):
        return self.prolific_data

    def get_transcript(self) -> str:
        return self.transcript
    
    def get_transcript_with_response_times(self) -> str:
        return self.transcript_with_response_times

    def get_choice_problem(self) -> str:
        return self.choice_problem
    
    def change_user_name(self, new_name):
        self.transcript = self.transcript.replace("user:", f"{new_name}:")
        self.transcript_with_response_times = self.transcript_with_response_times.replace("user:", f"{new_name}:")
    
    def change_assistant_name(self, new_name):
        self.transcript = self.transcript.replace("assistant:", f"{new_name}:")
        self.transcript_with_response_times = self.transcript_with_response_times.replace("assistant:", f"{new_name}:")

columns = [
    "Age",
    "Sex",
    "Ethnicity simplified",
    "Country of birth",
    "Country of residence",
    "Nationality",
    "Language",
    "Student status",
    "Employment status"
]

participants = []

import ast

for row_id in range(len(df)):

    participant = Participant(participant_id = df["Participant id"][row_id], 
                            condition = df["expcode"][row_id], 
                            prolific_data = {col: df.loc[row_id, col] for col in columns}, 
                            chat_transcript = ast.literal_eval(df["chat_data"][row_id]),
                            change_username = "[User]",
                            change_assistantname = "[Interviewer]"
                           )
    participants.append(participant)


In [21]:
# print(participants[0].get_demographics())
# print(participants[0].get_transcript())
# print(participants[0].get_transcript_with_response_times())
# print(participants[0].get_choice_problem())

In [24]:
from genagents.genagents import GenerativeAgent
import json
from tqdm import tqdm

sim_output = []

for participant in tqdm(participants[112:130]):
    # Initialize a new agent
    agent = GenerativeAgent()
    # Update the agent's scratchpad with personal information
    agent.update_scratch({k: (v.item() if hasattr(v, "item") else v) for k, v in participant.get_demographics().items()})
    dialogue = [
        ("Interviewer", participant.get_choice_problem()),
    ]

    response = agent.utterance(dialogue, context=participant.get_transcript())

    sim_output.append({ participant.get_participant_id() : response })

    agent.save("experiment_agents/{}".format(participant.get_participant_id()))

    # break


100%|██████████| 18/18 [00:16<00:00,  1.08it/s]


In [25]:
sim_output

[{'396d87109b7be13c46a1465cfc85c0a7da98cd7d89dad7b4': 'I would allocate 60% to auto safety and 40% to highway safety.'},
 {'3a6ad7459b7eb06919a61557a483c2a789cc9629dddc84e5': 'I would choose to allocate 60% to auto safety and 40% to highway safety.'},
 {'3a6bd514c17bef6d43f51507a4d193f58ecccb208adfd5e4': 'I would allocate 50% to auto safety and 50% to highway safety.'},
 {'3a6ed01f9a7ee46614f7135df0d7c6abdc9fca7d8d8d81b5': 'I would allocate 60% to auto safety and 40% to highway safety.'},
 {'3961d316cd7bb73817a44251a682c2a3db9f987a80dd82ef': 'I would allocate 60% to auto safety and 40% to highway safety.'},
 {'393ddc17cb7de76f10a44106a6d0c1a1dbcccb21d9d385b5': 'I would choose to allocate 60% to auto safety and 40% to highway safety.'},
 {'3a6ed1439f7ae46a43f24300f2d7cba0d39ccd7dd9dbd4b3': 'I would choose to allocate 50% to auto safety and 50% to highway safety.'},
 {'3a6ed212c028e16f15a14250f1d3c0a7dc9d9b2989dcd2e2': 'I would allocate 50% to auto safety and 50% to highway safety.'},
 {

In [None]:
merged_output = {}
for d in sim_output:
    merged_output.update(d)
merged_output

merged_df = pd.DataFrame(list(merged_output.items()), columns=["Participant_ID", "Response"])
merged_df.head()

In [None]:
merged_df.to_csv("experiment_data/sqb/sqb_simulated_responses_v3.csv", index=False)

#### TEST

In [None]:
# # pid = 112
# # pid = 363
# pid = 1

# from genagents.genagents import GenerativeAgent
# import json


# agent = GenerativeAgent()

# agent.update_scratch({k: (v.item() if hasattr(v, "item") else v) for k, v in participants[pid].get_demographics().items()})

# dialogue = [
#     ("Interviewer", participants[pid].get_choice_problem()),
# ]

# response = agent.utterance(dialogue, context=participants[pid].get_transcript())

# print(response)

# agent.save("experiment_agents/{}".format(participants[pid].get_participant_id()))