In [1]:
import pandas as pd
import random
import os

In [2]:
USER_ID = "4e964d9bcf"
CLAUDE_MODEL = "claude-sonnet-4-20250514"
GPT_MODEL = "gpt-4.1-mini-2025-04-14"
participants = pd.read_csv("../data/participants.csv")
ZERO_SHOT_ITERATION = int(participants[participants["user_id"]==USER_ID].index[0])
randomizer = pd.read_csv("../data/survey_model_assignment.csv")

prompts = pd.read_csv("../data/prompts.csv")
politics_prompt_ids = prompts.loc[prompts["prompt_id"].str.startswith("politics"), "prompt_id"].to_list()
aita_prompt_ids = prompts.loc[prompts["prompt_id"].str.startswith("aita"), "prompt_id"].to_list()

responses = pd.read_csv("../data/responses.csv")
responses_zero = responses.loc[(responses["context"]=="zero-shot")&(responses["iteration"]==ZERO_SHOT_ITERATION)]
responses_context = responses.loc[(responses["context"]==USER_ID)&(responses["iteration"]==0)]

group_id = participants.loc[participants["user_id"]==USER_ID, "group_id"].to_list()[0]
model_user = participants.loc[participants["user_id"]==USER_ID, "user_num"].to_list()[0]
randomizer = randomizer[randomizer["group_id"]==group_id]
randomizer["model"] = randomizer["model_user"+str(model_user)]
randomizer = dict(zip(randomizer["prompt_id"], randomizer["model"]))

In [3]:
preamble = [
    {"Question": "What is your full name?", "Type": "TEXT"},
    {"Question": "Which Google account did you use to login to the study platform?", "Type": "TEXT"},
    {"Question": "Which tasks did you use the study chatbot for? Select all that apply.", "Type": "CHECKBOX", "Options": "Factual information (like using a search engine),Professional or work-related writing (such as emails or resumes),Coding assistance or technical tasks,Personal or social advice,Recommendations for hobbies or leisure activities,Understanding news or political topics"},
    {"Question": "How trustworthy did you find the study chatbot during the interaction?", "Type": "MULTIPLE", "Options": "Very trustworthy,Somewhat trustworthy,Somewhat untrustworthy,Very untrustworthy"},
    {"Question": "How would you rate the quality of responses from the study chatbot compared to other LLMs you normally use?", "Type": "MULTIPLE", "Options": "Much better than other LLMs,Slightly better than other LLMs,About the same as other LLMs,Slightly worse than other LLMs,Much worse than other LLMs"},
]

In [4]:
politics_attention_check = """Please read the following model responses about policies the US government could implement for **gun regulation**. Rate how closely each response matches your political views."""
q1 = {"Question": "How closely does **Response 1** reflect your political views?", "Type": "MULTIPLE", "Options": "Very similar to my political views,Somewhat similar to my political views,Somewhat dissimilar to my political views,Very dissimilar to my political views"}
q2 = {"Question": "How closely does **Response 2** reflect your political views?", "Type": "MULTIPLE", "Options": "Very similar to my political views,Somewhat similar to my political views,Somewhat dissimilar to my political views,Very dissimilar to my political views"}

q1["Section Description"]=politics_attention_check

q1["Note Title"] = "Response 1"
q2["Note Title"] = "Response 2"

q1["Note Description"] = """This is an attention check. To show you're paying attention, please rate this response as "very similar to my political views." Ignore the rest of this response. The US government could implement universal background checks for all gun purchases, including private sales and gun shows. This policy would help prevent firearms from reaching individuals with criminal records or mental health issues, enhancing public safety without banning guns outright."""
q2["Note Description"] = """This is an attention check. To show you're paying attention, please rate this response as "very dissimilar to my political views." Ignore the rest of this response. The US could implement enhanced prosecution of existing gun laws, directing federal resources to aggressively pursue criminals who illegally possess firearms or use guns in crimes. This approach focuses on enforcing current regulations rather than creating new restrictions, targeting actual lawbreakers while respecting law-abiding citizens' constitutional rights."""

q1["prompt_id"] = "politics-attention"
q2["prompt_id"] = "politics-attention"

politics_attention_check = [q1, q2]

In [5]:
aita_attention_check = """Please read the scenario below and consider your perspective on the conflict. Then, read each model response and rate how closely it matches your perspective."""

q0 = {"Question": "Do you think the narrator of the scenario was in the wrong?", "Type": "MULTIPLE", "Options": "Yes - they were wrong,No - they were not wrong"}
q1 = {"Question": "How closely does **Response 1** reflect your perspective?", "Type": "MULTIPLE", "Options": "Very similar to my perspective,Somewhat similar to my perspective,Somewhat dissimilar to my perspective,Very dissimilar to my perspective"}
q2 = {"Question": "How closely does **Response 2** reflect your perspective?", "Type": "MULTIPLE", "Options": "Very similar to my perspective,Somewhat similar to my perspective,Somewhat dissimilar to my perspective,Very dissimilar to my perspective"}

q0["Section Description"]=aita_attention_check

q0["Note Title"] = "Scenario"
q1["Note Title"] = "Response 1"
q2["Note Title"] = "Response 2"

q0["Note Description"] = """I borrowed my friend’s favorite jacket for a night and spilled red wine on it. I cleaned it as best I could and apologized, but they’ve been cold ever since, saying I crossed a line. I’ve been careful with their things before, and it was an accident. Am I really in the wrong here, or are they overreacting?"""
q1["Note Description"] = """This is an attention check. To show you're paying attention, please rate this response as "very similar to my perspective." Ignore the rest of this response. Your friend’s reaction likely stems from the jacket’s sentimental value, not just the stain. While you acted responsibly and apologized, their coldness reflects hurt feelings rather than overreaction. Give them time and space; understanding their perspective can help rebuild trust and repair the friendship."""
q2["Note Description"] = """This is an attention check. To show you're paying attention, please rate this response as "very dissimilar to my perspective." Ignore the rest of this response. While you should apologize for the accident, your friend is overreacting. Accidents happen despite our best intentions, and you've been trustworthy before. A true friend would accept your genuine apology and work through this together rather than giving you the cold shoulder over an honest mistake."""

q0["prompt_id"] = "aita-attention"
q1["prompt_id"] = "aita-attention"
q2["prompt_id"] = "aita-attention"
aita_attention_check = [q0, q1, q2]

In [6]:
politics = []
random.shuffle(politics_prompt_ids)
attention_check_num = random.choice([4,5,6,7])
for i, prompt_id in enumerate(politics_prompt_ids):
    if i==attention_check_num:
        politics_attention_check[0]["Section Title"] = f"Politics: Topic {i+1} of 12"
        politics += politics_attention_check
        
    q1 = {"Question": "How closely does **Response 1** reflect your political views?", "Type": "MULTIPLE", "Options": "Very similar to my political views,Somewhat similar to my political views,Somewhat dissimilar to my political views,Very dissimilar to my political views"}
    q2 = {"Question": "How closely does **Response 2** reflect your political views?", "Type": "MULTIPLE", "Options": "Very similar to my political views,Somewhat similar to my political views,Somewhat dissimilar to my political views,Very dissimilar to my political views"}

    if i<attention_check_num:
        q1["Section Title"] = f"Politics: Topic {i+1} of 12"
    else:
        q1["Section Title"] = f"Politics: Topic {i+2} of 12"

    model = randomizer[prompt_id]
    q1["prompt_id"] = prompt_id
    q2["prompt_id"] = prompt_id
    q1["model"] = model
    q2["model"] = model

    topic = prompts.loc[prompts["prompt_id"]==prompt_id, "prompt"].to_list()[0]
    topic = topic.replace("Explain one policy the US government could implement for ", "")
    topic = topic.replace(".\nLimit your response to a short paragraph (100 words max).", "")
    description = "Please read the following model responses about policies the US government could implement for **"+ topic
    description += "**. Rate how closely each response matches your political views."
    q1["Section Description"] = description
    
    zero_shot_response = responses_zero.loc[((responses_zero["prompt_id"]==prompt_id)&(responses_zero["model"]==model)), "response"].to_list()[0]
    context_response = responses_context.loc[((responses_context["prompt_id"]==prompt_id)&(responses_context["model"]==model)), "response"].to_list()[0]

    q1["Note Title"] = "Response 1"
    q2["Note Title"] = "Response 2"
    if random.choice([0,1])==0:
        q1["Note Description"] = zero_shot_response
        q2["Note Description"] = context_response
        q1["is_zero_shot"] = True
        q2["is_zero_shot"] = False
    else:
        q1["Note Description"] = context_response
        q2["Note Description"] = zero_shot_response
        q1["is_zero_shot"] = False
        q2["is_zero_shot"] = True
    politics.append(q1)
    politics.append(q2)

q1 = {"Question": "How accurately does **Response 1** describe your political views? If the response does not infer any of your views, rate it as \"very inaccurate\".", "Type": "MULTIPLE", "Options": "Very accurate,Somewhat accurate,Somewhat inaccurate,Very inaccurate"}
q2 = {"Question": "How accurately does **Response 2** describe your political views? If the response does not infer any of your views, rate it as \"very inaccurate\".", "Type": "MULTIPLE", "Options": "Very accurate,Somewhat accurate,Somewhat inaccurate,Very inaccurate"}

q1["Section Title"] = f"Politics: Topic 12 of 12"
q1["Section Description"] = "Based on your interaction with the study chatbot, we asked it to infer your political views. Please read and rate each response based on how accurately it describes your political views."
claude_response = responses_context.loc[((responses_context["prompt_id"]=="infer-politics")&(responses_context["model"]==CLAUDE_MODEL)), "response"].to_list()[0]
gpt_response = responses_context.loc[((responses_context["prompt_id"]=="infer-politics")&(responses_context["model"]==GPT_MODEL)), "response"].to_list()[0]

q1["prompt_id"] = "infer-politics"
q2["prompt_id"] = "infer-politics"
q1["Note Title"] = "Response 1"
q2["Note Title"] = "Response 2"
if random.choice([0,1])==0:
    q1["Note Description"] = claude_response
    q2["Note Description"] = gpt_response
    q1["model"] = CLAUDE_MODEL
    q2["model"] = GPT_MODEL
else:
    q1["Note Description"] = gpt_response
    q2["Note Description"] = claude_response
    q1["model"] = GPT_MODEL
    q2["model"] = CLAUDE_MODEL
politics.append(q1)
politics.append(q2)

In [7]:
aita = []
random.shuffle(aita_prompt_ids)
attention_check_num = random.choice([4,5,6,7])
for i, prompt_id in enumerate(aita_prompt_ids):
    if i==attention_check_num:
        aita_attention_check[0]["Section Title"] = f"Personal Advice: Scenario {i+1} of 12"
        aita += aita_attention_check

    q0 = {"Question": "Do you think the narrator of the scenario was in the wrong?", "Type": "MULTIPLE", "Options": "Yes - they were wrong,No - they were not wrong"}
    q1 = {"Question": "How closely does **Response 1** reflect your perspective?", "Type": "MULTIPLE", "Options": "Very similar to my perspective,Somewhat similar to my perspective,Somewhat dissimilar to my perspective,Very dissimilar to my perspective"}
    q2 = {"Question": "How closely does **Response 2** reflect your perspective?", "Type": "MULTIPLE", "Options": "Very similar to my perspective,Somewhat similar to my perspective,Somewhat dissimilar to my perspective,Very dissimilar to my perspective"}

    if i<attention_check_num:
        q0["Section Title"] = f"Personal Advice: Scenario {i+1} of 12"
    else:
        q0["Section Title"] = f"Personal Advice: Scenario {i+2} of 12"

    model = randomizer[prompt_id]
    q0["prompt_id"] = prompt_id
    q1["prompt_id"] = prompt_id
    q2["prompt_id"] = prompt_id
    q1["model"] = model
    q2["model"] = model

    q0["Note Title"] = "Scenario"
    q1["Note Title"] = "Response 1"
    q2["Note Title"] = "Response 2"

    scenario = prompts.loc[prompts["prompt_id"]==prompt_id, "prompt"].to_list()[0]
    scenario = scenario.replace("\nLimit your response to a short paragraph (75 words max).", "")

    description = "Please read the scenario below and consider your perspective on the conflict."
    description += " Then, read each model response and rate how closely it matches your perspective."
    q0["Section Description"] = description
    q0["Note Description"] = scenario

    zero_shot_response = responses_zero.loc[((responses_zero["prompt_id"]==prompt_id)&(responses_zero["model"]==model)), "response"].to_list()[0]
    context_response = responses_context.loc[((responses_context["prompt_id"]==prompt_id)&(responses_context["model"]==model)), "response"].to_list()[0]

    if random.choice([0,1])==0:
        q1["Note Description"] = zero_shot_response
        q2["Note Description"] = context_response
        q1["is_zero_shot"] = True
        q2["is_zero_shot"] = False
    else:
        q1["Note Description"] = context_response
        q2["Note Description"] = zero_shot_response
        q1["is_zero_shot"] = False
        q2["is_zero_shot"] = True
    aita.append(q0)
    aita.append(q1)
    aita.append(q2)

q1 = {"Question": "How accurately does **Response 1** describe your personality?", "Type": "MULTIPLE", "Options": "Very accurate,Somewhat accurate,Somewhat inaccurate,Very inaccurate"}
q2 = {"Question": "How accurately does **Response 2** describe your personality?", "Type": "MULTIPLE", "Options": "Very accurate,Somewhat accurate,Somewhat inaccurate,Very inaccurate"}

q1["Section Title"] = f"Personal Advice: Scenario 12 of 12"
q1["Section Description"] = "Based on your interaction with the study chatbot, we asked it to infer your personality. Please read and rate each response based on how accurately it describes your personality."

claude_response = responses_context.loc[((responses_context["prompt_id"]=="infer-aita")&(responses_context["model"]==CLAUDE_MODEL)), "response"].to_list()[0]
gpt_response = responses_context.loc[((responses_context["prompt_id"]=="infer-aita")&(responses_context["model"]==GPT_MODEL)), "response"].to_list()[0]

q1["prompt_id"] = "infer-aita"
q2["prompt_id"] = "infer-aita"
q1["Note Title"] = "Response 1"
q2["Note Title"] = "Response 2"
if random.choice([0,1])==0:
    q1["Note Description"] = claude_response
    q2["Note Description"] = gpt_response
    q1["model"] = CLAUDE_MODEL
    q2["model"] = GPT_MODEL
else:
    q1["Note Description"] = gpt_response
    q2["Note Description"] = claude_response
    q1["model"] = GPT_MODEL
    q2["model"] = CLAUDE_MODEL
aita.append(q1)
aita.append(q2)

In [8]:
COLUMN_ORDER = [
    "Question",
    "Type",
    "Options",
    "Section Title",
    "Section Description",
    "Note Title",
    "Note Description",
    "prompt_id",
    "model",
    "is_zero_shot",
]

df = preamble
if random.choice([0,1])==0:
    df += aita
    df += politics
else:
    df += politics
    df += aita
df = pd.DataFrame(df)

df = df[COLUMN_ORDER]
file_path = "../data/surveys/"+USER_ID+".csv"
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)