# Blood Pressure Plotting

A short notebook demonstrating how to plot BP readings for a cohort of synthetic patients. Hypertension diagnoses, if present in the patient's history, are also displayed.

We also include plots showing the prevalence of HT among the synthetic population and how this differs based on demographics.

To generate a cohort of 100 synthetic patients run the following command inside your local clone of the repo,

`./run_synthea -p 100 --exporter.csv.export true`

In [None]:
import pandas as pd

# Point this to the csv subfolder of the output directory created by Synthea
output_path = "../output/csv"

conds = pd.read_csv(f"{output_path}/conditions.csv")
obs = pd.read_csv(f"{output_path}/observations.csv")
meds = pd.read_csv(f"{output_path}/medications.csv")
pats = pd.read_csv(f"{output_path}/patients.csv")
encounters = pd.read_csv(f"{output_path}/encounters.csv")

In [None]:
import matplotlib.pyplot as plt

def date_format(date_str):
    truncated_str = date_str[:10]
    return pd.to_datetime(truncated_str)

def date_first_ht_med(meds_df, patient_id):
    patient_ht_meds = meds_df[(meds_df["PATIENT"] == patient_id) & (meds_df["REASONCODE"] == "hypertension_dx")]
    first_date = patient_meds["START"].min()
    return first_date

def plot_bp(patient_id, obs_df, conds, encounters):
    ht_conds = conds[conds["DESCRIPTION"] == "Essential hypertension (disorder)"]
    ht_patients = list(ht_conds["PATIENT"])
    if patient_id in ht_patients:
        ht_diagnosis_date = ht_conds[ht_conds["PATIENT"] == patient_id]["START"].iloc[0]
        print(ht_diagnosis_date)
        ht_diagnosis_date = date_format(ht_diagnosis_date)
        print(ht_diagnosis_date)
    else:
        ht_diagnosis_date = None
    sys_values = obs_df[(obs_df["DESCRIPTION"].str.contains("Systolic")) & (obs["PATIENT"] == patient_id)]
    dia_values = obs_df[(obs_df["DESCRIPTION"].str.contains("Diastolic")) & (obs["PATIENT"] == patient_id)]
    fig, ax = plt.subplots(1,1, figsize=(8,4))
    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
    ax.plot(sys_values["DATE"].apply(date_format), sys_values["VALUE"].astype(float), label = "systolic")
    ax.plot(dia_values["DATE"].apply(date_format), dia_values["VALUE"].astype(float), label = "diastolic")

    title = f"BP Measurements for Patient: {patient_id}"

    if ht_diagnosis_date:
        ax.axvline(ht_diagnosis_date, label="HT Diagnosis", c="r", linestyle = "--")
    ax.set_title(title)
    plt.xticks(rotation = 45)
    plt.legend()
    plt.show()

In [None]:
ht_patients = list(set(conds[conds["DESCRIPTION"] == "Essential hypertension (disorder)"]["PATIENT"]))
patients = list(set(conds["PATIENT"]))

# plot for first 10 patients
for pat in patients[:4]:
    plot_bp(pat, obs, conds, encounters)

# Population Analysis

### Proportion of HT Patients

In [None]:
fig, ax = plt.subplots(1,1)

p_ht_patients = 100*len(ht_patients)/len(patients)
p_non_ht_patients = 100*(len(patients) - len(ht_patients))/len(patients)

ax.bar(range(2), [p_ht_patients, p_non_ht_patients])
ax.set_xticks([0, 1], ["HT Patients", "Non-HT Patients"])
ax.set_ylabel("% of patients")
ax.set_title("Proportion of Patients Diagnosed with HT During Simulation")


### Age at HT Diagnosis

In [None]:
ht_diagoneses = conds[conds["DESCRIPTION"] == "Essential hypertension (disorder)"]

patient_births = pats.rename(columns={"Id": "PATIENT"})

ht_diagoneses = ht_diagoneses.merge(patient_births, on=["PATIENT"], how="left")

ht_diagoneses["START"] = ht_diagoneses["START"].apply(pd.to_datetime)
ht_diagoneses["BIRTHDATE"] = ht_diagoneses["BIRTHDATE"].apply(pd.to_datetime)

ht_diagoneses["PATIENT_AGE"] = ht_diagoneses["START"] - ht_diagoneses["BIRTHDATE"]

ht_diagoneses["DIAGNOSIS_AGE"] = ht_diagoneses.apply(lambda x: len(pd.date_range(x["BIRTHDATE"], x["START"], freq="YE")), axis=1)

In [None]:
fig, ax = plt.subplots(1,1)

ht_diagoneses["DIAGNOSIS_AGE"].plot.hist(ax=ax, bins=50)

ax.set_title("Age at HT Diagnosis")

### Gender and Race Breakdown

In [None]:
race_perc_dict = {}

gender_perc_dict = {}

for race in ht_diagoneses["RACE"].unique():
    total = len(pats[pats["RACE"] == race])
    ht = len(ht_diagoneses[ht_diagoneses["RACE"] == race])
    race_perc_dict[race] = 100*ht/total

for gender in ht_diagoneses["GENDER"].unique():
    total = len(pats[pats["GENDER"] == gender])
    ht = len(ht_diagoneses[ht_diagoneses["GENDER"] == gender])
    gender_perc_dict[gender] = 100*ht/total

In [None]:
race_perc_dict

In [None]:
fig, axs = plt.subplots(1,2, figsize=(15,5))

axs[0].bar(race_perc_dict.keys(), race_perc_dict.values())
axs[0].set_ylabel("% of patients")
axs[0].set_title("Proportion of HT by Race")

axs[1].bar(gender_perc_dict.keys(), gender_perc_dict.values())
axs[1].set_ylabel("% of patients")
axs[1].set_title("Proportion of HT by Gender")