# Document Generation with DSPy

In [10]:
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
import mlflow
import dspy
import random
import utils
assert(load_dotenv())
random.seed(313)
dspy.configure(lm=dspy.LM("openai/gpt-5", temperature=1.0, max_tokens=16000))
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Astrik Doc Gen")
mlflow.dspy.autolog(log_evals=True, log_compiles=True, log_traces_from_compile=True)
utils.hello_world()

'Hello, world!'

Load Synthea Data

In [11]:
data_dir = Path("data/synthea")
dataframes = {csv_file.stem: pd.read_csv(csv_file, header=0) for csv_file in data_dir.glob("*.csv")}

#for key, df in dataframes.items():
#    print(f"{key}:\t{sorted(df.columns.tolist())}")


In [None]:


# Load the first patient as an example
first_patient = utils.PatientProfile.from_row(dataframes["patients"].iloc[0])
print(f"Patient: {first_patient.prefix} {first_patient.first} {first_patient.last}")
print(f"Birthdate: {first_patient.birthdate.year}-{first_patient.birthdate.month:02d}-{first_patient.birthdate.day:02d}")
print(f"Alive: {first_patient.deathdate is None}")
print(f"\nFull profile:")
print(first_patient.model_dump_json(indent=2))

Patient: Mr. José Eduardo181 Gómez206
Birthdate: 1989-05-25
Alive: True

Full profile:
{
  "id": "1d604da9-9a81-4ba9-80c2-de3375d59b40",
  "birthdate": {
    "year": 1989,
    "month": 5,
    "day": 25
  },
  "ssn": "999-76-6866",
  "first": "José Eduardo181",
  "last": "Gómez206",
  "race": "white",
  "ethnicity": "hispanic",
  "gender": "M",
  "birthplace": "Marigot  Saint Andrew Parish  DM",
  "address": "427 Balistreri Way Unit 19",
  "city": "Chicopee",
  "state": "Massachusetts",
  "county": "Hampden County",
  "deathdate": null,
  "drivers": "S99984236",
  "passport": "X19277260X",
  "prefix": "Mr.",
  "suffix": null,
  "maiden": null,
  "zip": "1013",
  "marital": "M",
  "lat": 42.22835382315942,
  "lon": -72.56295055096882,
  "healthcare_expenses": 271227.08,
  "healthcare_coverage": 1334.88
}


#### Patient Profile Narrator

In [None]:
class PatientProfileNarrator(dspy.Signature):
    """Reads a structured patient profile and narrates it as a medical professional would do in the context of a conversation with a colleague. The narrator wants to give a concise summary of the patient profile, focusing on the most relevant attributes for a medical professional in a clinical setting. The colleague should be able to use the information for generating a medical report or for further analysis of the patient profile."""

    patient_profile: utils.PatientProfile = dspy.InputField(
        desc="A structured patient profile from the EHR system. The profile includes relevant attributes like name, gender, birthdate etc. but also attributes that might not be relevant for the conversation like social security number. Some attributes like Address may only be relevant if it is something unusual like homelessness or if its in a tropical country for example.")

    narrative: str = dspy.OutputField(desc="The narrative summary of the patient profile.")

narrator = dspy.ChainOfThought(PatientProfileNarrator)
# sample a line from dataframes["patients"]
sample_patient = dataframes["patients"].sample().iloc[0]
patient_profile = utils.PatientProfile.from_row(
    sample_patient
)
narrative = narrator(patient_profile=patient_profile).narrative
print(narrative)




Mr. Merlin721 Breitenberg711 is a Black, non-Hispanic male, born on 1965-02-12 in Boston, Massachusetts. He resided in Easthampton, Massachusetts (Hampshire County) and was married. The patient is deceased, with a date of death of 2018-10-08 at age 53. No additional clinical information (problem list, medications, allergies, or past medical/surgical history) is provided in this profile.


#### Encounter Narrator

In [14]:
sample_encounter = dataframes["encounters"].sample().iloc[0]

In [None]:
encounter_profile = utils.EncounterProfile.from_row(
    sample_encounter
)

class EncounterProfileNarrator(dspy.Signature):
    """Reads a structured encounter profile and narrates it as a medical professional would do in the context of a conversation with a colleague. The narrator wants to give a concise summary of the encounter, focusing on the most relevant attributes for a medical professional in a clinical setting. The colleague should be able to use the information for generating a medical report or for further analysis of the encounter profile."""

    encounter_profile: utils.EncounterProfile = dspy.InputField(desc="A structured encounter profile from the EHR system. The profile includes relevant attributes like encounterclass, description, reasondescription etc. but also attributes that might not be relevant for the conversation like code or id. Some attributes like start and end may only be relevant in combination if it is something unusual like a very long stay.")

    patient_profile_narrative: str = dspy.InputField(desc="A narrative summary of the patient profile associated with the encounter.")

    narrative: str = dspy.OutputField(desc="The narrative summary of the encounter profile.")


# create patient_profile_narrative
narrator = dspy.ChainOfThought(PatientProfileNarrator)
# get the row from the patients dataframe where the Id matches the patient (uuid) from the encounter
patient = dataframes["patients"].loc[dataframes["patients"]["Id"] == encounter_profile.patient].iloc[0]
patient_profile = utils.PatientProfile.from_row(
    patient
)
patient_profile_narrative = narrator(patient_profile=patient_profile).narrative



narrator = dspy.ChainOfThought(EncounterProfileNarrator)
narrative = narrator(encounter_profile=encounter_profile, patient_profile_narrative=patient_profile_narrative).narrative
print(narrative)


2015-07-11 outpatient wellness encounter for a 14-year-old Hispanic female (Guatemala-born, residing in Lowell, MA), presenting for a routine well-child visit. The encounter lasted approximately 15 minutes (18:46–19:01). No chief complaint or acute concerns documented; reason for visit denoted as preventive care only. No additional procedures, tests, or immunizations are recorded in this encounter entry. Total charge was $129.16, fully covered by the payer; no patient responsibility documented.
