In [10]:
import pandas as pd
import random
import json


In [5]:
df = pd.read_csv('core_df_with_summaries.csv')

In [6]:
#symtoms for random adding
SYMPTOMS_BY_TRIAGE = {
    "GREEN": [
        ["no chest pain", "no shortness of breath", "feels well"],
        ["mild chest discomfort", "anxiety"],
        ["palpitations", "fatigue"],
        ["lightheadedness", "stress"],
        ["asymptomatic", "routine checkup"]
    ],
    "YELLOW": [
        ["chest tightness", "shortness of breath on exertion"],
        ["palpitations", "dizziness"],
        ["atypical chest pain", "fatigue"],
        ["near-syncope", "weakness"],
        ["episodic chest discomfort", "nausea"]
    ],
    "RED": [
        ["severe chest pain", "shortness of breath", "diaphoresis"],
        ["crushing chest pain", "nausea", "vomiting"],
        ["syncope", "palpitations"],
        ["chest pain radiating to left arm", "shortness of breath"],
        ["confusion", "hypotension", "chest pain"]
    ]
}


In [7]:
def fill_synthetic_symptoms(core_df, seed=42):
    random.seed(seed)
    df = core_df.copy()

    symptoms_list = []

    for _, row in df.iterrows():
        triage = row["triage_label"]
        candidates = SYMPTOMS_BY_TRIAGE.get(triage, [["no symptoms"]])
        chosen = random.choice(candidates)
        symptoms_text = ", ".join(chosen)
        symptoms_list.append(symptoms_text)

    df["symptoms"] = symptoms_list
    return df

In [8]:
df = fill_synthetic_symptoms(df) #add symptoms

In [9]:
df.head() #check if added

Unnamed: 0,ecg_id,patient_id,age,sex,height,weight,nurse,site,device,recording_date,...,extra_beats,pacemaker,strat_fold,filename_lr,filename_hr,triage_label,image_path,ecg_summary,symptoms,triage_target_json
0,146,13447.0,45.0,0,182.0,90.0,,3.0,CS-12 E,1986-02-23 11:41:43,...,,,7,records100/00000/00146_lr,records500/00000/00146_hr,RED,ecg_images/00146.png,Here's a concise analysis of the 12-lead ECG:\...,"severe chest pain, shortness of breath, diapho...",
1,177,21551.0,73.0,0,,,,3.0,AT-6 C,1986-03-15 08:11:15,...,,,4,records100/00000/00177_lr,records500/00000/00177_hr,RED,ecg_images/00177.png,Here's a concise analysis of the 12-lead ECG:\...,"severe chest pain, shortness of breath, diapho...",
2,223,16039.0,82.0,0,,,1.0,2.0,CS-12,1986-06-08 17:24:13,...,,,9,records100/00000/00223_lr,records500/00000/00223_hr,RED,ecg_images/00223.png,Here's a concise analysis of the provided 12-l...,"syncope, palpitations",
3,544,13772.0,67.0,0,,,,3.0,AT-6 C,1987-03-13 16:32:49,...,,,9,records100/00000/00544_lr,records500/00000/00544_hr,RED,ecg_images/00544.png,Here's a concise analysis of the 12-lead ECG:\...,"crushing chest pain, nausea, vomiting",
4,631,5043.0,83.0,0,183.0,70.0,11.0,1.0,AT-6 6,1987-04-17 10:10:03,...,1ES,,4,records100/00000/00631_lr,records500/00000/00631_hr,GREEN,ecg_images/00631.png,Here's a concise analysis of the provided 12-l...,"mild chest discomfort, anxiety",


In [11]:
#build target

RED_FLAGS_BY_TRIAGE = {
    "GREEN": [],
    "YELLOW": [
        "ST-segment depression",
        "T-wave inversion",
        "exertional chest pain",
        "palpitations",
        "dizziness"
    ],
    "RED": [
        "ST-segment elevation",
        "ventricular ectopy",
        "tachycardia",
        "syncope",
        "severe chest pain"
    ]
}

ACTIONS_BY_TRIAGE = {
    "GREEN": [
        "Reassure patient",
        "Monitor symptoms",
        "Repeat ECG if symptoms persist",
        "Outpatient follow-up"
    ],
    "YELLOW": [
        "Repeat ECG",
        "Cardiac biomarkers",
        "Clinical observation",
        "Consider cardiology consultation"
    ],
    "RED": [
        "Urgent transfer to emergency department",
        "Continuous cardiac monitoring",
        "Repeat ECG and cardiac biomarkers",
        "Immediate cardiiology consultation"
    ]
}



In [12]:
def build_triage_target_json(core_df):
    df = core_df.copy()
    targets = []

    for _, row in df.iterrows():
        triage = row["triage_label"]

        target = {
            "triage_level": triage,
            "red_flags": RED_FLAGS_BY_TRIAGE.get(triage, []),
            "suggested_actions": ACTIONS_BY_TRIAGE.get(triage, [])
        }

        targets.append(json.dumps(target, indent=2))

    df["triage_target_json"] = targets
    return df

In [13]:
df = build_triage_target_json(df)

In [14]:
df.head()

Unnamed: 0,ecg_id,patient_id,age,sex,height,weight,nurse,site,device,recording_date,...,extra_beats,pacemaker,strat_fold,filename_lr,filename_hr,triage_label,image_path,ecg_summary,symptoms,triage_target_json
0,146,13447.0,45.0,0,182.0,90.0,,3.0,CS-12 E,1986-02-23 11:41:43,...,,,7,records100/00000/00146_lr,records500/00000/00146_hr,RED,ecg_images/00146.png,Here's a concise analysis of the 12-lead ECG:\...,"severe chest pain, shortness of breath, diapho...","{\n ""triage_level"": ""RED"",\n ""red_flags"": [\..."
1,177,21551.0,73.0,0,,,,3.0,AT-6 C,1986-03-15 08:11:15,...,,,4,records100/00000/00177_lr,records500/00000/00177_hr,RED,ecg_images/00177.png,Here's a concise analysis of the 12-lead ECG:\...,"severe chest pain, shortness of breath, diapho...","{\n ""triage_level"": ""RED"",\n ""red_flags"": [\..."
2,223,16039.0,82.0,0,,,1.0,2.0,CS-12,1986-06-08 17:24:13,...,,,9,records100/00000/00223_lr,records500/00000/00223_hr,RED,ecg_images/00223.png,Here's a concise analysis of the provided 12-l...,"syncope, palpitations","{\n ""triage_level"": ""RED"",\n ""red_flags"": [\..."
3,544,13772.0,67.0,0,,,,3.0,AT-6 C,1987-03-13 16:32:49,...,,,9,records100/00000/00544_lr,records500/00000/00544_hr,RED,ecg_images/00544.png,Here's a concise analysis of the 12-lead ECG:\...,"crushing chest pain, nausea, vomiting","{\n ""triage_level"": ""RED"",\n ""red_flags"": [\..."
4,631,5043.0,83.0,0,183.0,70.0,11.0,1.0,AT-6 6,1987-04-17 10:10:03,...,1ES,,4,records100/00000/00631_lr,records500/00000/00631_hr,GREEN,ecg_images/00631.png,Here's a concise analysis of the provided 12-l...,"mild chest discomfort, anxiety","{\n ""triage_level"": ""GREEN"",\n ""red_flags"": ..."


In [16]:
df.to_csv("core_db_complete.csv", index=False)