In [1]:
from semlib import Session
import os
import csv
from dotenv import load_dotenv
from collections import Counter

In [2]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
MAX_CONCURRENCY=5

session = Session(model="openai/gpt-4.1-mini", max_concurrency=MAX_CONCURRENCY)

In [3]:
with open("MTS-Dialog-TrainingSet.csv", encoding="latin-1") as f_in:
    csv_file = csv.reader(f_in)
    header = next(csv_file)
    convos = [dict(zip(header, row, strict=False)) for row in csv_file]

print(f"Loaded {len(convos)} convos\n")
print(f"Example convo: {convos[1]['dialogue']}")

Loaded 1201 convos

Example convo: Doctor: How're you feeling today?  
Patient: Terrible. I'm having the worst headache of my life. 
Doctor: I'm so sorry. Well you are only twenty five, so let's hope this is the last of the worst. Let's see how we can best help you. When did it start? 
Patient: Around eleven in the morning. 
Doctor: Today? 
Patient: Um no yesterday. July thirty first. 
Doctor: July thirty first O eight. Got it. Did it come on suddenly? 
Patient: Yeah. 
Doctor: Are you having any symptoms with it, such as blurry vision, light sensitivity, dizziness, lightheadedness, or nausea? 
Patient: I'm having blurry vision and lightheadedness.  I also can't seem to write well. It looks so messy. I am naturally right handed but my writing looks like I am trying with my left. 
Doctor: How would you describe the lightheadedness? 
Patient: Like there are blind spots. 
Doctor: Okay. How about any vomiting? 
Patient: Um no. I feel like my face is pretty swollen though. I don't know if it

In [4]:
extracted_reasons = await session.map(
    convos,
    template=lambda r: f"""
Extract the patient's chief complaint or reason for coming to the doctor. 1-2 word response only.
{r['dialogue']}
""".strip(),
)

In [5]:
extracted_reasons[0]

'refill'

In [6]:
extracted_family_illnesses = await session.map(
    convos,
    template=lambda r: f"""
Extract the any illness(es) that the patient and doctor are concerned about. Respond with only the illness(es) separated by commas. If none respond with none.
{r['section_text']}
""".strip(),
)

In [7]:
extracted_family_illnesses[0]

'hypertension, osteoarthritis, osteoporosis, hypothyroidism, allergic rhinitis, kidney stones'

In [8]:
extracted_medications = await session.map(
    convos,
    template=lambda r: f"""
Extract the patient's medications. Respond with only the medication(s) separated by commas. If none respond with none.
{r['section_text']}
""".strip(),
)

In [9]:
extracted_medications[0]

'none'

In [10]:
# Split each element by comma, strip whitespace, and flatten into one list
all_meds = [med.strip() for patient in extracted_medications for med in patient.split(',')]

# Count occurrences
med_counts = Counter(all_meds)

# Get 5 most common
top_5_meds = med_counts.most_common(5)

top_5_meds

[('none', 1038),
 ('Allegra', 8),
 ('Tylenol', 7),
 ('Benadryl', 5),
 ('Coumadin', 5)]

In [11]:
extracted_symptoms = await session.map(
    convos,
    template=lambda r: f"""
Extract the patient's symptom. Respond with only the symptom(s) separated by commas. 1-2 words for each symptom. If patient has no symptons, respond with none.
{r['section_text']}
""".strip(),
)

In [14]:
extracted_symptoms[0]

'none'

In [13]:
# Split each element by comma, strip whitespace, and flatten into one list
all_symptoms = [symptom.strip() for patient in extracted_symptoms for symptom in patient.split(',')]

# Count occurrences
symptoms_counts = Counter(all_symptoms)

# Get 5 most common
top_5_symptoms = symptoms_counts.most_common(5)

top_5_symptoms

[('none', 734),
 ('pain', 26),
 ('nausea', 23),
 ('vomiting', 17),
 ('back pain', 15)]