## Initial setup

In [None]:
import os
import json
from openai import OpenAI

# Configuration
os.environ['OPENAI_API_KEY'] = "YOUR-API-KEY"

client = OpenAI(
  organization='YOUR-ORG-KEY',
  project='YOUR-PROJECT-KEY',
)

## Read in domain questions from human results csv to build personas

In [None]:
import pandas as pd

# Read the CSV
df = pd.read_csv("./human_results.csv", keep_default_na=False, na_values=[])

# Select columns 2–4 (indexing starts at 0 so cols 1:4)
subset = df.iloc[:, 0:3]

# Replace any real NaN (just in case) with ""
subset = subset.fillna("")

# Build dictionary P1–P23
persona_data = {f"P{i+1}": tuple(subset.iloc[i]) for i in range(min(23, len(subset)))}

# Print nicely with newlines
for k, v in persona_data.items():
    print(f"{k}: {v}")

In [None]:
persona_dict = {}
for k, v in persona_data.items():

  # first question
  if v[0] == 'No':
    first = 'does not'
  else: # 'Yes'
    first = 'does'

  # second question
  if v[1] == 'No':
    second = 'does not'
  else: # 'Yes'
    second = 'does'

  # third question
  if v[2] == 'Legal':
    third = 'has legal expertise'
  if v[2] == 'Medical':
    third = 'has medical expertise'
  if v[2] == 'None':
    third = 'does not have legal or medical expertise'

  persona_dict[k] = f"Respond as a person who {first} work in automatic speech recognition technology, {second} work in STEM (science, technology engineering, mathematics), and {third}. Respond only with the letter for the answer choice."

# Print nicely with newlines
for k, v in persona_dict.items():
    print(f"{k}: {v}")

# Prepare SpeechSpectrum data for experiment

In [None]:
page_legal = """
(A) VERBATIM: Well, I, I remember that day pretty clearly because, um, it was raining really hard and, and I was driving to work around, oh, maybe seven-thirty in the morning? And, uh, I was going down Main Street when I saw, I saw this car, a blue sedan I think it was, come speeding through the intersection without, without stopping at the red light. And then, then there was this loud crash sound and I, I pulled over to see what happened. The, the driver of the blue car, he got out and he was, he was yelling something but I couldn't really hear what he was saying because of the rain and, and all the commo –  commotion. Actually, wait, I think it was a Honda, no, maybe a Toyota, I'm not sure about the make.

(B) NON-VERBATIM: I remember that day pretty clearly because it was raining really hard and I was driving to work around seven-thirty in the morning. I was going down Main Street when I saw this car, a blue sedan, come speeding through the intersection without stopping at the red light. Then there was this loud crash sound and I pulled over to see what happened. The driver of the blue car got out and he was yelling something but I couldn't really hear what he was saying because of the rain and all the commotion. I think it was a Honda, maybe a Toyota, I'm not sure about the make.

(C) ENHANCED: I clearly remember that day because it was raining heavily and I was driving to work around 7:30 AM. I was going down Main Street when I observed a blue sedan speed through the intersection without stopping at the red light. Following the crash, I pulled over to assess the situation. The driver of the blue car exited and was shouting, but I could not hear clearly due to the rain and commotion. I believe it was a Honda or Toyota, though I am uncertain of the exact make.

(D) BULLET POINTS:
* Incident occurred during heavy rain, ~7:30 AM
* Witness driving on Main Street to work
* Blue sedan ran red light at high speed
* Collision occurred, witness pulled over
* Blue car driver exited vehicle, yelling inaudibly
"""

page_medical = """
(A) VERBATIM: Okay, so, um, the patient came in today and, uh, she was complaining of, you know, chest pain that's been going on for about, let's see, um, three days now. And, uh, she said it gets worse when she, when she breathes deeply or, or coughs. So I, I examined her and found some, uh, some tenderness in the, the inter – intercostal muscles on the right side. Her vital signs were, um, let me think, blood pressure was one-twenty over eighty, heart rate was, was seventy-two, and temperature was normal at, uh, ninety-eight point six. I'm thinking this is probably, you know, costochon – costochondritis rather than anything, anything more serious like a cardiac event.

(B) NON-VERBATIM: The patient came in today and she was complaining of chest pain that's been going on for about three days now. She said it gets worse when she breathes deeply or coughs. I examined her and found some tenderness in the intercostal muscles on the right side. Her vital signs were blood pressure was one-twenty over eighty, heart rate was seventy-two, and temperature was normal at ninety-eight point six. I'm thinking this is probably costochondritis rather than anything more serious like a cardiac event.

(C) ENHANCED: The patient came in today complaining of chest pain that has been ongoing for three days. She reports it worsens with deep breathing and coughing. On examination, I found tenderness in the intercostal muscles on the right side. Vital signs: blood pressure 120/80, heart rate 72, temperature 98.6°F. Assessment is likely costochondritis rather than a cardiac event.

(D) BULLET POINTS:
* Patient: 3-day chest pain history
* Triggers: deep breathing, coughing
* Physical findings: right intercostal tenderness
* Vitals: BP 120/80, HR 72, temp 98.6°F
* Diagnosis: likely costochondritis vs cardiac
"""

page_business = """
(A) VERBATIM: So, um, we need to talk about the quarterly numbers and, and where we stand with the project timeline. I know, I know we've been behind schedule but, but Sarah's team has been working really hard on the, the user interface redesign. And, uh, Mike, didn't you say something about the database issues being, being mostly resolved now? Or was that the ser – server issues? No, database, right. I think, I think we're in a much better position than we were last month but we still need to, to figure out the budget allocation for the next quarter and, and maybe hire one or two more developers if, if we want to meet the December deadline. Actually, was it December or January? I think December.

(B) NON-VERBATIM: We need to talk about the quarterly numbers and where we stand with the project timeline. I know we've been behind schedule but Sarah's team has been working really hard on the user interface redesign. Mike, didn't you say something about the database issues being mostly resolved now? I think we're in a much better position than we were last month but we still need to figure out the budget allocation for the next quarter and maybe hire one or two more developers if we want to meet the December deadline.

(C) ENHANCED: We need to discuss the quarterly numbers and our current position on the project timeline. We have been behind schedule, but Sarah's team has been working hard on the user interface redesign. Mike mentioned that the database issues are mostly resolved. We are in a better position than last month, but we still need to determine budget allocation for next quarter and consider hiring additional developers to meet the December deadline.

(D) BULLET POINTS:
* Review quarterly numbers & project timeline
* Behind schedule but UI redesign progressing well
* Database issues mostly resolved
* Improved position vs. last month
* Action items: budget allocation, potential hiring for December deadline
"""

survey_questions = {
    "Q1": "Imagine you are a case judge reading through a deposition transcript. Which version of the transcript (i.e. point) is the most helpful for you to answer the following question: Did the defendant seem confident about the details of the crash?" + " " + page_legal,
    "Q2": "Imagine you are a case judge reading through a deposition transcript. Which version of the transcript (i.e. point) is the most helpful for you to answer the following question: What were the events leading up to the crash?" + " " + page_legal,
    "Q3": "Imagine you are a doctor looking over a triage dictation provided by a nurse. Which version of the transcript (i.e. point) is the most helpful for you to answer the following question: What are the main symptoms the patient is exhibiting?" + " " + page_medical,
    "Q4": "Imagine you are a doctor looking over a triage dictation provided by a nurse. Which version of the transcript (i.e. point) is the most helpful for you to answer the following question: Has the chest pain been going on for exactly three days, or could it have been longer/shorter?" + " " + page_medical,
    "Q5": "Imagine you are a team leader reading a meeting transcript. Which version of the transcript (i.e. point) is the most helpful for you to answer the following question: Does the team seem like they will meet the December deadline?" + " " + page_business,
    "Q6": "Imagine you are a team leader reading a meeting transcript. Which version of the transcript (i.e. point) is the most helpful for you to answer the following question: What are the action items from the meeting?" + " " + page_business
}

## Run experiment

In [None]:
results = []

for persona_name, persona_text in persona_dict.items():
  for question_name, question_text in survey_questions.items():
    for round in [1,2,3]:

      # set up input
      input_list = [
              {
                  "role": "developer",
                  "content": persona_text
              },
              {
                  "role": "user",
                  "content": question_text
              }
          ]
      print(input_list)

      # get response
      response = client.responses.create(
          model="gpt-5-mini-2025-08-07",
          input=input_list
      )

      print(response.output_text)

      # save response
      results.append({"persona_name": persona_name,
                      "question_name": question_name,
                      "round": round,
                      "response": response.output_text
                      })

      print()

# Save experiment results

In [None]:
# save results to json file
with open("results.json", "w") as f:
    json.dump(results, f, indent=2)

# print results
print(results)

# # to load back later
# with open("data.json", "r") as f:
#     loaded = json.load(f)
# print(loaded)

import csv

# save results to csv file
with open("results.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=results[0].keys())
    writer.writeheader()
    writer.writerows(results)

# # to load back later
# with open("data.csv", "r") as f:
#     reader = csv.DictReader(f)
#     loaded = list(reader)