In [3]:
import polars as pl
import numpy as np
import Hermes

  from .autonotebook import tqdm as notebook_tqdm


# Test

In [28]:
system_prompt = """
# Overview
You are a helper Agent, named Hermes-R, of the Hermes Agentic System. 
The job of the Hermes Agentic System is to convert unstructured raw clinical notes into a structured report along with a knowledge graph. 
This task is accomplished by using helper Agents like you. 

## Your Task
Your task is to convert unstructured raw clinical notes into a structured report. You will be given the unstructured raw medical notes of a patient, along with the format of the structured report, which will start with <|start-of-format|> token and end with <|end-of-format|> token. You have to fill the given format based on the unstructured notes.

## Format of the structured report:
<|start-of-format|>
# Patient Report

## A. Comprehensive Patient Profile (CPP)

### Administrative & Identification Details
- **Patient Name:**  
- **Date of Birth (DOB):**  
- **Gender:**  
- **Patient ID / MRN:**  
- **Phone / Email:**  
- **Address:**  
- **Emergency Contact Person:**  
- **Emergency Contact Info:**  
- **Date CPP Last Updated:**  

---

## B. Subjective Objective Assessment Plan (SOAP)

### S - Subjective

#### 1. Chief Complaint (CC)
> *e.g., “I've been having chest pain for the past 2 days.”*  
-  

#### 2. History of Present Illness (HPI)
> *Use OLDCARTS: Onset, Location, Duration, Character, Aggravating/Alleviating, Radiation, Timing, Severity*  
-  

#### 3. Past Medical History (PMH)
-  

#### 4. Past Surgical History (PSH)
-  

#### 5. Family History (FH)
-  

#### 6. Social History (SH)
-  

#### 7. Review of Systems (ROS)
- **General:**  
- **HEENT:**  
- **Respiratory:**  
- **CVS:**  
- **GI:**  
- **GU:**  
- **MSK:**  
- **Neuro:**  
- **Skin:**  
- **Psych:**  

#### 8. Current Medications and Allergies
- **Medications:**
    - ...
- **Allergies:**
    - ...

### O - Objective

#### 1. Vital Signs (During Admission)
-  

#### 2. Physical Examination
- **General:**  
- **HEENT:**  
- **Neck:**  
- **Cardiac:**  
- **Lungs:**  
- **Abdomen:**  
- **Extremities:**  
- **Pulses:**  
- **Neuro:**  
- **Skin:**  

#### 3. Laboratory Data
-  

#### 4. Imaging and Other Diagnostic Data
-  

#### 5. Other Clinicians' Notes (if applicable)
-  

---

### A - Assessment

#### 1. Problem List
1.  
2.  

#### 2. Differential Diagnoses
- **[Problem Name]**  
  -  ...

### P - Plan

#### Problem 1: [Name]
- **Investigations:**  
- **Therapeutic Interventions:**  
- **Consults/Referrals:**  
- **Patient Education:**  
- **Follow-Up/Monitoring:**  

#### Problem 2: [Name]
- **Investigations:**  
- **Therapeutic Interventions:**  
- **Consults/Referrals:**  
- **Patient Education:**  
- **Follow-Up/Monitoring:**  

---

## C. Discharge Conditions (DC)

### 1. Physical Exam at Discharge
- **Vitals / VS:**  
- **General:**  
- **HEENT:**  
- **Neck:**  
- **Cardiac:**  
- **Lung:**  
- **Abdomen:**  
- **Extremities:**  
- **Pulses:**  
- **Neuro:**  
- **Skin:** 

### 2. Discharge Condition
- **Mental Status:**  
- **Level of Consciousness:**  
- **Activity Status:**  

### 3. Discharge Instructions
> *Medical summary, precautions, follow-up*

-  

### 4. Disposition
> *e.g., Home, Skilled Nursing Facility, etc.*

-  

### 5. Brief Hospital Course
> *Summary of presentation, diagnostics, treatment, consults, response*

-  
<|end-of-format|>

### WARNING
Remember, do not summarize anything and strictly follow the given format. Do not add any extra heading and do not miss any headings given in the format. If some information is missing then leave it blank in the structured report.
"""

In [29]:
unstructured_report = """
# Unstructured raw clinical notes -

Name:  ___                      Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   M
 
Service: MEDICINE
 
Allergies: 
No Known Allergies / Adverse Drug Reactions
 
Attending: ___.
 
Chief Complaint:
Right flank bruising and pain s/p fall
 
Major Surgical or Invasive Procedure:
None

 
History of Present Illness:
Mr. ___ is a ___ with history of factor VIII deficiency who 
presents with right neck swelling after snowboarding accident. 
The patient reports that he fell while snowboarding with loss of 
consciousness on ___. He was initially seen at 
___ where CT imaging of head/neck showed no 
intracranial hemorrhage. A CTA neck showed a thickened right 
platysma muscle with surrounding hematoma and a focus of active 
contrast extravasation within the right platysma muscle. He also 
developed a right shoulder hematoma although shoulder plain 
films didn't show acute abnormality. He was seen by ___ 
Hematology and gave him one dose of DDAVP IV. A factor VIII 
assay was 139 and vW level was >200 per report. Per report, his 
hemoglobin decreased from 13.2 on ___ to 11.6 on ___. 
Repeat imaging in the morning showed stable injuries. The 
patient saw his hematologist on ___ and was found to have a 
hemoglobin of 10.4. Because of the continued mild decrease, the 
patient followed up with his PCP ___ ___ at which time his 
hemoglobin was 9.9. He was found to have an enlarging flank 
hematoma, thus was referred given concern for retroperitoneal 
bleed. The patient has been using DDAVP intranasally 
intermittently since the accident. He denies lightheadedness or 
palpitations, any increase in neck swelling over the course of 
the week. He does endorse pain in his right shoulder ___ 
resting, ___ moving), though this has improved over the course 
of the week.  
In the ED, initial vital signs were 99.2 87 124/75 18 100%/RA. 
Initial labs demonstrated hemoglobin 10.6, though repeat was 
9.7. Chemistries and coags were unremarkable. FVIII activity was 
103. A CTAP was performed which demonstrated muscular hemorrhage 
along the flank, but no retroperitoneal bleed on preliminary 
read. The patient's outpatient hematologist, Dr. ___, was 
contacted and it was decided to give the patient desmopressin 
0.3mg/kg IV. The patient was then admitted for futher 
management.  
Per review of records, the patient has a history of significant 
bleeding after his circumcision, requiring blood transfusion. 
Throughout childhood, he also had a tendency to bruise easily. 
He was tested and found to have ___ disease. Later, 
after wisdom tooth extraction, the patient experienced late 
(e.g. ___ days later) bleeding despite treatment with DDAVP. The 
patient was retested by a hematologist associated with the 
___ and was diagnosed 
with hemophilia A. His FVIII activity has been checked on 
multiple occasions, sometimes testing normal, though has been as 
low as ~50.  
Upon arrival to the floor, the patient is comfortable without 
complaint.  
Review of Systems:  
(+) per HPI  
(-) fever, chills, night sweats, headache, vision changes, 
rhinorrhea, congestion, sore throat, cough, shortness of breath, 
chest pain, abdominal pain, nausea, vomiting, diarrhea, 
constipation, BRBPR, melena, hematochezia, dysuria, hematuria.
 
Past Medical History:
-Factor VIII deficiency (mild)  

 
Social History:
___
Family History:
The patient's mother had tendency to bleed.  

 
Physical Exam:
ON ADMISSION
VS:98 120/40 64 20 100RA  
GENERAL: lying flat in bed, no acute distress  
HEENT: NCAT, MMM, OP clear  
NECK: Supple  
CARDIAC: RRR, S1/S2, no murmurs, gallops, or rubs  
LUNG: Generally CTA b/l  
ABDOMEN: Soft, non-tender, non-distended  
EXTREMITIES: Warm, well-perfused  
PULSES: 2+ DP pulses bilaterally  
NEURO: CN II-XII intact  
SKIN: Hematomas on right aspect of neck and flank 

ON DISCHARGE
Vitals: 98.0, 100-120/40-58, 66, 20, 99 on RA  
GENERAL: lying flat in bed, no acute distress  
HEENT: NCAT, MMM, OP clear  
NECK: Supple  
CARDIAC: RRR, S1/S2, no murmurs, gallops, or rubs  
LUNG: Generally CTA b/l  
ABDOMEN: Soft, non-tender, non-distended  
EXTREMITIES: Warm, well-perfused  
PULSES: 2+ DP pulses bilaterally  
NEURO: CN II-XII intact  
SKIN: Hematomas on right aspect of neck and flank

 
Pertinent Results:
ADMISSION, DISCHARGE, PERTINENT LABS:

___ 07:03PM BLOOD WBC-6.6 RBC-3.58* Hgb-10.6*# Hct-29.8*# 
MCV-83 MCH-29.6 MCHC-35.6* RDW-14.7 Plt ___
___ 07:03PM BLOOD Neuts-69.7 ___ Monos-7.2 Eos-2.4 
Baso-0.2
___ 07:03PM BLOOD ___ PTT-35.2 ___
___ 07:03PM BLOOD Plt ___
___ 07:03PM BLOOD FacVIII-103
___ 07:03PM BLOOD Glucose-93 UreaN-15 Creat-0.8 Na-139 
K-4.1 Cl-101 HCO3-28 AnGap-14
___ 11:00PM BLOOD WBC-6.6 RBC-3.30* Hgb-9.7* Hct-27.0* 
MCV-82 MCH-29.4 MCHC-36.0* RDW-14.7 Plt ___
___ 07:25AM BLOOD WBC-5.2 RBC-3.14* Hgb-9.3* Hct-26.2* 
MCV-83 MCH-29.7 MCHC-35.7* RDW-14.6 Plt ___
___ 03:25PM BLOOD WBC-6.3 RBC-3.27* Hgb-9.9* Hct-27.1* 
MCV-83 MCH-30.3 MCHC-36.5* RDW-14.7 Plt ___
___ 07:50PM URINE Color-Yellow Appear-Clear Sp ___
___ 07:50PM URINE Blood-NEG Nitrite-NEG Protein-30 
Glucose-NEG Ketone-NEG Bilirub-NEG Urobiln-NEG pH-6.0 Leuks-NEG
___ 07:50PM URINE RBC-<1 WBC-1 Bacteri-FEW Yeast-NONE Epi-0
___ 07:50PM URINE Mucous-RARE

IMAGING/STUDIES:
 
___ CT A/P
Acute hemorrhage along right posterior flank musculature and 
probably layering over it, only partly imaged and hard to 
distinguish musculature from hemorrhage. No active extravasation 
seen. Probable old hematoma along posterior left flank. 

 
Brief Hospital Course:
Mr. ___ is a ___ with history of mild FVIII deficiency who 
presents after snowboarding accident with multiple hematomas and 
falling hemoglobin concerning for ongoing bleeding.

# FACTOR VIII DEFICIENCY, MULTIPLE HEMATOMAS: Patient presented 
after recent snowboarding accident. At ___ 
___, imaging was notable for neck and shoulder hematomas. 
Upon reevaluation by his PCP, the patient was found to have a 
flank hematoma. Given falling hemoglobin, there was concern for 
retroperitoneal bleed. CTAP in the ED demonstrated hematoma over 
his flank musculature, but no active extravasation. He was given 
IV DDAVP, but FVIII activity was 103 (wnl). CBC remained stable 
and patient declined further inpatient monitoring. Atrius 
hematology recommended continued outpatient hemoglobin 
monitoring, but did not think further DDAVP was indicated given 
normal FVIII level. 
  

# TRANISTIONAL ISSUES:
- PCP ___ ___
- CBC ___ - ___ at ___
- Caution to avoid dangerous activity
- Code: presumed full  
- Emergency Contact: ___ ___ - wife) 
 
Medications on Admission:
The Preadmission Medication list is accurate and complete.
1. Desmopressin Nasal ___ mcg NAS PRN bleeding 

 
Discharge Medications:
1. Acetaminophen 1000 mg PO Q8H pain 
2. Desmopressin Nasal ___ mcg NAS PRN bleeding 
3. Outpatient Lab Work
CBC on ___ or ___. Last hemoglobin 9.9 ___ ___.

 
Discharge Disposition:
Home
 
Discharge Diagnosis:
PRIMARY:
- Acute muscular hematoma, right flank
- Hemophilia, factor VIII deficiency

 
Discharge Condition:
Mental Status: Clear and coherent.
Level of Consciousness: Alert and interactive.
Activity Status: Ambulatory - Independent.

 
Discharge Instructions:
Mr. ___,

It was our pleasure caring for you at ___ 
___. You were admitted with bruising on your right 
side and low blood counts after a snowboarding fall. With your 
history of hemophilia, it was important to evaluate internal 
bleeding which did show a right muscular flank blood collection. 
Your facotr VIII level was 103 and you received IV DDAVP under 
our care. Your blood counts were stable to improved on the day 
of admission. 

It is important that you not participate in any dangerous 
activities given your recent bleed and your hemophilia. Bleeding 
in hemophiliacs has more potential to be life-threatening.

Please get your blood counts checked at ___ site on either 
___ or ___. Follow up with your 
regular doctor early next week.

Best wishes,
Your ___ Care Team
 
Followup Instructions:
___
"""

In [30]:
import ollama

resp = ollama.chat(model='deepseek-r1:70b', messages=[
  {
    'role': 'system',
    'content': system_prompt,
  },
  {
    'role': 'user',
    'content': unstructured_report,
  }
], options={"num_ctx": 32000},)
response = resp['message']['content']
print(response)

<think>
Alright, so I've got this task where I need to convert some unstructured clinical notes into a structured report following a specific format. The user has provided both the raw notes and the desired format, which ends with a sample output. My job is to understand how to approach this correctly.

First, I'll start by carefully reading through the unstructured notes to identify all relevant patient information. The notes include details about the patient's admission, history, lab results, imaging, medications, and discharge plans. It's important to extract each piece of data accurately without missing anything.

Next, I need to map this extracted information into the given structured format. This means going through each section like Administrative Details, SOAP notes, Discharge Conditions, etc., and populating them with the corresponding data from the raw notes. If any information is missing in the raw notes, I should leave those sections blank as per the user's instructions.

I

In [31]:
import re

headings1 = ["Patient Report"]
headings2 = ["A. Comprehensive Patient Profile (CPP)",
            "B. Subjective Objective Assessment Plan (SOAP)",
            "C. Discharge Conditions (DC)"]
headings3 = ["Administrative & Identification Details",
               "S - Subjective",
               "O - Objective",
               "A - Assessment",
               "P - Plan",
               "Physical Exam at Discharge",
                "Discharge Condition",
                "Discharge Instructions",
                "Disposition",
                "Brief Hospital Course"]
headings4 = ["Chief Complaint (CC)",
               "History of Present Illness (HPI)",
               "Past Medical History (PMH)",
               "Past Surgical History (PSH)",
               "Family History (FH)",
               "Social History (SH)",
               "Review of Systems (ROS)",
               "Current Medications and Allergies",
               "Vital Signs (During Admission)",
               "Physical Examination",
               "Laboratory Data",
               "Imaging and Other Diagnostic Data",
               "Other Clinicians' Notes",
               "Problem List",
               "Differential Diagnoses",
               "Problem 1",
               "Problem 2"]


def extract_headings_by_level(text):
    headings = {
        1: re.findall(r'^# (.+)', text, re.MULTILINE),
        2: re.findall(r'^## (.+)', text, re.MULTILINE),
        3: re.findall(r'^### (.+)', text, re.MULTILINE),
        4: re.findall(r'^#### (.+)', text, re.MULTILINE),
    }
    return headings

headings = extract_headings_by_level(response)

In [32]:
def check_headings(headings, expected_headings):
    """
    Returns True if all expected headings are present in the headings list. Otherwise, returns False.
    """
    for expected in expected_headings:
        for heading in headings:
            if expected.lower() in heading.lower():
                break
        else:
            print(f"Missing heading: {expected}")
            return False
    return True

def calculate_reward(headings, expected_headings):
    """
    Calculate the reward based on the number of expected headings found in the headings list.
    """
    reward = 0
    for expected in expected_headings:
        for heading in headings:
            if expected in heading:
                reward += 1
                break
    return reward / len(expected_headings)

def reward_function(response):
    """
    Takes the response and calculates the reward based on the headings.
    """
    headings = extract_headings_by_level(response)
    reward = 0
    reward += calculate_reward(headings[1], headings1)
    reward += calculate_reward(headings[2], headings2)
    reward += calculate_reward(headings[3], headings3)
    reward += calculate_reward(headings[4], headings4)
    reward /= 4
    return reward
    

check1 = check_headings(headings[1], headings1)
print(f"Check 1: {check1}")
check2 = check_headings(headings[2], headings2)
print(f"Check 2: {check2}")
check3 = check_headings(headings[3], headings3)
print(f"Check 3: {check3}")
check4 = check_headings(headings[4], headings4)
print(f"Check 4: {check4}")
reward = reward_function(response)
print(f"Reward: {reward}")

Check 1: True
Check 2: True
Check 3: True
Check 4: True
Reward: 1.0


# Load Data

In [14]:
system_prompt = """
# Overview
You are a helper Agent, named Hermes-R, of the Hermes Agentic System. 
The job of the Hermes Agentic System is to convert unstructured raw clinical notes into a structured report along with a knowledge graph. 
This task is accomplished by using helper Agents like you. 

## Your Task
Your task is to convert unstructured raw clinical notes into a structured report. You will be given the unstructured raw medical notes of a patient, along with the format of the structured report, which will start with <|start-of-format|> token and end with <|end-of-format|> token. You have to fill the given format based on the unstructured notes.

## Format of the structured report:
<|start-of-format|>
# Patient Report

## A. Comprehensive Patient Profile (CPP)

### Administrative & Identification Details
- **Patient Name:**  
- **Date of Birth (DOB):**  
- **Gender:**  
- **Patient ID / MRN:**  
- **Phone / Email:**  
- **Address:**  
- **Emergency Contact Person:**  
- **Emergency Contact Info:**  
- **Date CPP Last Updated:**  

---

## B. Subjective Objective Assessment Plan (SOAP)

### S - Subjective

#### 1. Chief Complaint (CC)
> *e.g., “I've been having chest pain for the past 2 days.”*  
-  

#### 2. History of Present Illness (HPI)
> *Use OLDCARTS: Onset, Location, Duration, Character, Aggravating/Alleviating, Radiation, Timing, Severity*  
-  

#### 3. Past Medical History (PMH)
-  

#### 4. Past Surgical History (PSH)
-  

#### 5. Family History (FH)
-  

#### 6. Social History (SH)
-  

#### 7. Review of Systems (ROS)
- **General:**  
- **HEENT:**  
- **Respiratory:**  
- **CVS:**  
- **GI:**  
- **GU:**  
- **MSK:**  
- **Neuro:**  
- **Skin:**  
- **Psych:**  

#### 8. Current Medications and Allergies
- **Medications:**
    - ...
- **Allergies:**
    - ...

### O - Objective

#### 1. Vital Signs (During Admission)
-  

#### 2. Physical Examination
- **General:**  
- **HEENT:**  
- **Neck:**  
- **Cardiac:**  
- **Lungs:**  
- **Abdomen:**  
- **Extremities:**  
- **Pulses:**  
- **Neuro:**  
- **Skin:**  

#### 3. Laboratory Data
-  

#### 4. Imaging and Other Diagnostic Data
-  

#### 5. Other Clinicians' Notes (if applicable)
-  

---

### A - Assessment

#### 1. Problem List
1.  
2.  

#### 2. Differential Diagnoses
- **[Problem Name]**  
  -  ...

### P - Plan

#### Problem 1: [Name]
- **Investigations:**  
- **Therapeutic Interventions:**  
- **Consults/Referrals:**  
- **Patient Education:**  
- **Follow-Up/Monitoring:**  

#### Problem 2: [Name]
- **Investigations:**  
- **Therapeutic Interventions:**  
- **Consults/Referrals:**  
- **Patient Education:**  
- **Follow-Up/Monitoring:**  

---

## C. Discharge Conditions (DC)

### 1. Physical Exam at Discharge
- **Vitals / VS:**  
- **General:**  
- **HEENT:**  
- **Neck:**  
- **Cardiac:**  
- **Lung:**  
- **Abdomen:**  
- **Extremities:**  
- **Pulses:**  
- **Neuro:**  
- **Skin:** 

### 2. Discharge Condition
- **Mental Status:**  
- **Level of Consciousness:**  
- **Activity Status:**  

### 3. Discharge Instructions
> *Medical summary, precautions, follow-up*

-  

### 4. Disposition
> *e.g., Home, Skilled Nursing Facility, etc.*

-  

### 5. Brief Hospital Course
> *Summary of presentation, diagnostics, treatment, consults, response*

-  
<|end-of-format|>

### WARNING
Remember, do not summarize anything and strictly follow the given format. Do not add any extra heading and do not miss any headings given in the format. If some information is missing then leave it blank in the structured report.
"""

In [15]:
data_path = "Data/mimic4/discharge.csv"
start_index = 0
n_rows = 2

def format_rl_prompts(data):
    texts = []
    for report in data:
        texts.append(f"# Unstructured raw clinical notes - \n\n{report}")
    return texts

df = pl.read_csv(data_path, n_rows=start_index+n_rows)
df = df["text"]
df = df[start_index:start_index+n_rows]
data = format_rl_prompts(df)
print(len(data))

2


# Reward Function

In [16]:
import re

headings1 = ["Patient Report"]
headings2 = ["A. Comprehensive Patient Profile (CPP)",
            "B. Subjective Objective Assessment Plan (SOAP)",
            "C. Discharge Conditions (DC)"]
headings3 = ["Administrative & Identification Details",
                "Subjective",
                "Objective",
                "Assessment",
                "Plan",
                "Physical Exam at Discharge",
                "Discharge Condition",
                "Discharge Instructions",
                "Disposition",
                "Brief Hospital Course"]
headings4 = ["Chief Complaint (CC)",
                "History of Present Illness (HPI)",
                "Past Medical History (PMH)",
                "Past Surgical History (PSH)",
                "Family History (FH)",
                "Social History (SH)",
                "Review of Systems (ROS)",
                "Current Medications and Allergies",
                "Vital Signs (During Admission)",
                "Physical Examination",
                "Laboratory Data",
                "Imaging and Other Diagnostic Data",
                "Other Clinicians",
                "Problem List",
                "Differential Diagnoses",
            #    "Problem 1",
            #    "Problem 2"
               ]

In [17]:
def calculate_reward(headings, expected_headings):
    """
    Calculate the reward based on the number of expected headings found in the headings list.
    """
    reward = 0
    for expected in expected_headings:
        for heading in headings:
            if expected in heading:
                reward += 1
                break
    return reward / len(expected_headings)

def reward_function(response):
    """
    Takes the response and calculates the reward based on the headings.
    """
    headings = extract_headings_by_level(response)
    reward = 0
    reward += calculate_reward(headings[1], headings1)
    reward += calculate_reward(headings[2], headings2)
    reward += calculate_reward(headings[3], headings3)
    reward += calculate_reward(headings[4], headings4)
    reward /= 4
    return reward

# Run Evaluation

In [None]:
import ollama

path = "Data/evaluation_logs"
hermes_rewards = []
llm_rewards = []

for index, input_text in enumerate(data):
    print(f"Example {index+1}/{len(data)}")
    
    hermes = Hermes.HermesAgenticSystem(mainSaveFolder=f"{path}/{index+start_index+1}")
    kGraph, hermes_structured_report = hermes.completeRun(unstructuredReport=input_text)
    hermes_reward = reward_function(hermes_structured_report)
    hermes_rewards.append(hermes_reward)
    
    resp = ollama.chat(model='deepseek-r1:70b', messages=[
    {
        'role': 'system',
        'content': system_prompt,
    },
    {
        'role': 'user',
        'content': input_text,
    }
    ], options={"num_ctx": 32000},)
    llmStructuredReport = resp['message']['content']
    llm_reward = reward_function(llmStructuredReport)
    llm_rewards.append(llm_reward)
    
    print(f"Hermes Reward: {hermes_reward}, LLM Reward: {llm_reward}")

hermes_reward_mean = np.mean(hermes_rewards)
hermes_reward_std = np.std(hermes_rewards)
llm_reward_mean = np.mean(llm_rewards)
llm_reward_std = np.std(llm_rewards)

print(f"Mean Reward (Hermes): {hermes_reward_mean}")
print(f"Standard Deviation of Reward (Hermes): {hermes_reward_std}")
print(f"Mean Reward (LLM): {llm_reward_mean}")
print(f"Standard Deviation of Reward (LLM): {llm_reward_std}")