In [1]:
from ollama import chat
from pydantic import BaseModel, Field
from typing import List, Literal, Union, Dict
import json
from datetime import datetime, timedelta
from openai import OpenAI
import os
import pandas as pd
import re


In [2]:
AVAILABLE_MODELS = {
    'llama3.3': 'llama3.3',
    'llama3.3-q8': 'llama3.3:70b-instruct-q8_0',
    'gemma3_fp16': 'gemma3:27b-it-fp16',
    'gemma3_q8': 'gemma3:27b-it-q8_0',
    'deepseek-r1': 'deepseek-r1:70b'
}

DEFAULT_MODEL = 'gemma3_fp16'

model = AVAILABLE_MODELS[DEFAULT_MODEL]

In [10]:
HADM_ID = 113344.0

In [11]:
# home medication extraction, use structured output
class HomeMeds(BaseModel):
    medications: List[str] = Field(description="List of medications with dosage forms, e.g. 'Celexa 20mg capsule'")

home_meds = json.load(open(f'results/notes/admission_meds_{HADM_ID}.json'))

meds_prompt = f"""
Extract all medications from the below json file, including the dosage form and dosage strength. e.g. 'Celexa 20mg capsule'
Medications:
{home_meds}
"""

response = chat(
  messages=[
    {
      'role': 'user',
      'content': meds_prompt,
    }
  ],
  model=model,
  format=HomeMeds.model_json_schema(),
)

home_meds_med_list = HomeMeds.model_validate_json(response.message.content)
print(home_meds_med_list)

medications=['Celexa 20mg capsule']


In [13]:
# modify events to exclude any mentioning of medications
class ModifiedEvent(BaseModel):
    modified_text: str = Field(description="Event text with medication mentions replaced")

events = json.load(open(f'results/notes/events_{HADM_ID}.json'))
admit_time = events[0]['admit_time']
admit_time = datetime.strptime(admit_time, '%Y-%m-%d %H:%M:%S')

processed_events = []

for event in events:
    event_text = event.get('events', '')
    
    single_event_prompt = f"""
    Modify the following hospital event text by replacing any mentions of specific medications with the generic word 'medication'.
    Do not remove or alter any other information such as dates, vitals, or non-medication related events.
    
    Event Text:
    {event_text}
    """
    
    response = chat(
        messages=[
            {
                'role': 'user',
                'content': single_event_prompt,
            }
        ],
        model=model,
        format=ModifiedEvent.model_json_schema(),
    )
    
    result = ModifiedEvent.model_validate_json(response.message.content)
    
    # Create a copy of the original event and update only the events field
    modified_event = event.copy()
    modified_event['events'] = result.modified_text
    processed_events.append(modified_event)

# Print the first processed event as an example
print("Sample processed event:")
print(processed_events[1])

Sample processed event:
{'admit_time': '2138-09-13 02:29:00', 'discharge_time': '2138-10-29', 'event_time': '2138-10-04 07:14:00', 'vitals': 'Tmax: 36.9,C (98.4,   Tcurrent: 36,C (96.8,   HR: 103 (97 - 120) bpm,   BP: 99/62(76) {99/62(76) - 99/62(76)} mmHg,   ', 'events': '24 Hour Events:,  -HD,  -planned to C/O today but no available bed. also started on 30mg PO,  medication for consistant tachycardia over last 2 days. BP has been,  stable.,  -had several runs of NSVT overnight, occurred while she was being moved,  and bathed. Her BP remained stable and she was asx and the episode,  resolved. nothing was given.'}


In [14]:
# extract scans from restuls
class Scans(BaseModel):
    scans: Dict[str, str] = Field(description="Dictionary of scans with date and resutls, e.g. {'2124-01-01': 'CT CAP: 1. Extensive retroperitoneal...'}")

scan_notes = json.load(open(f'results/notes/scans_{HADM_ID}.json'))

scan_prompt = f"""
Extract all medical imaging scan results and date from the below json file, e.g. {{'2134-01-01': 'CT CAP: 1. Extensive retroperitoneal...'}}
If the scan time is not mentioned, use the admission time {admit_time}
Scans:
{scan_notes}
"""

response = chat(
  messages=[
    {
      'role': 'user',
      'content': scan_prompt,
    }
  ],
  model=model,
  format=Scans.model_json_schema(),
)

scan_dict = Scans.model_validate_json(response.message.content)
print(scan_dict)

scans={'2138-10-29 03:49AM': 'BLOOD Glucose-203* UreaN-79* Creat-2.5* Na-125* K-6.0* Cl-104 HCO3-12* AnGap-15', '2138-10-20 01:44PM': 'BLOOD LD(LDH)-261*', '2138-10-17 03:17AM': 'BLOOD ALT-23 AST-34 LD(LDH)-257* AlkPhos-73 TotBili-0.3', '2138-09-26 08:44AM': 'BLOOD Cortsol-20.9*', '2138-09-29 05:29AM': 'BLOOD Lactate-7.9*', '2138-09-13 01:40PM': 'BLOOD b2micro-27.8*'}


In [15]:
# identify meds started on admission (less than 6 hours after admission)

hospital_meds_med_list = json.load(open(f'results/notes/sample_meds_{HADM_ID}.json'))
admission_day_meds = []
for med in hospital_meds_med_list:
    if 'start_time' in med and med['start_time']:
        med_start_time = datetime.strptime(med['start_time'], '%Y-%m-%d %H:%M:%S')
        if med_start_time < admit_time + timedelta(hours=6):
            admission_day_meds.append(med)

print(admission_day_meds)

# identify scans done at admission (less than 6 hours after admission)
admission_scans = {}
for date_str, scan_text in scan_dict.scans.items():
    try:
        # Try full format first
        scan_date = datetime.strptime(date_str, '%Y-%m-%d')
    except ValueError:
        try:
            scan_date = datetime.strptime(f"{admit_time.year}-{date_str}", '%Y-%m-%d')
        except ValueError:
            print(f"Warning: Could not parse date {date_str}, skipping...")
            continue
    
    if admit_time - timedelta(days=1) < scan_date < admit_time + timedelta(hours=6):
        admission_scans[date_str] = scan_text

print("Admission scans:", admission_scans)

# identify labs done at admission (less than 6 hours after admission)
lab_dict = json.load(open(f'results/notes/sample_lab_{HADM_ID}.json'))

admission_labs = []
baseline_labs = []
for lab in lab_dict:
    lab_time = datetime.strptime(lab['lab_time'], '%Y-%m-%d %H:%M:%S')
    lab_entry = {
        'lab_name': lab['lab_name'],
        'lab_value': lab['lab_value'],
        'lab_time': lab['lab_time']
    }
    
    if admit_time - timedelta(hours=24) < lab_time < admit_time + timedelta(hours=6): 
        admission_labs.append(lab_entry)
    
    if lab_time < admit_time - timedelta(days=2):
        baseline_labs.append(lab_entry)

# Sort baseline labs by time (most recent first) and keep only latest 10
baseline_labs.sort(key=lambda x: datetime.strptime(x['lab_time'], '%Y-%m-%d %H:%M:%S'), reverse=True)
baseline_labs = baseline_labs[:10]

# drop lab_time from admission_labs and baseline_labs
for lab in admission_labs:
    lab.pop('lab_time', None)
for lab in baseline_labs:
    lab.pop('lab_time', None)


print("Admission labs:", admission_labs)
print("\nBaseline labs (latest 10):", baseline_labs)

[{'medication': 'D5W, IV', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '1000 mL, 1 mL'}, {'medication': 'NS, IV', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '1000 mL, 1 BAG'}, {'medication': 'NS, IV', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '1000 mL, 1 BAG'}, {'medication': 'Sodium Polystyrene Sulfonate, PO', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '60 gm, 4 BTL'}, {'medication': 'Lactulose, PO', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '30 mL, 1 UDCUP'}, {'medication': 'Lactulose, PO', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '30 mL, 1 UDCUP'}, {'medication': 'Sodium Polystyrene Sulfonate, PO', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '30 gm, 2 BTL'}, {'medication': 'Sodium Bicarbonate, IV', 'start_time': 

In [16]:
# Admission meds compare to home meds, new meds

class AdmitMeds(BaseModel):
    medications: List[str] = Field(description="List of new medication names, e.g. 'Propofol'")


new_meds_prompt = f"""
You are provided with two medication lists. 
Identify and return NEW and DIFFERENT medications in LIST 2 compared to LIST 1. 
Return a list of medication names, EXACTLY as they are in LIST 2, e.g. "Propofol"

LIST 1: 
{home_meds_med_list}

LIST 2:
{admission_day_meds}
"""

response = chat(
  messages=[
    {
      'role': 'user',
      'content': new_meds_prompt,
    }
  ],
  model=model,
  format=AdmitMeds.model_json_schema(),
)

parsed_meds = AdmitMeds.model_validate_json(response.message.content)
print(f"parsed_meds: {parsed_meds}")

filtered_meds_prompt = f"""
You are provided with a list of medications.
Review and remove medications that are 
1. IV fluids (such as NS, D5W). 
2.used for intubation (e.g. midazolam, vecuronium, propofol). 
3.used for pain (e.g. fentanyl, opioids). 
4. supportive meds (e.g. senna, vitamins, tylenol, electrolyte replacement, DVT prophylaxis such as enoxaparin or heparin sq)
In other words, only include medications that are essential for the patient's treatment.

LIST:
{parsed_meds}
"""
response_2 = chat(
  messages=[
    {
      'role': 'user',
      'content': filtered_meds_prompt,
    }
  ],
  model=model,
  format=AdmitMeds.model_json_schema(),
)

filtered_parsed_meds = AdmitMeds.model_validate_json(response_2.message.content)
new_meds_med_list = [med.split(',')[0].lower() for med in filtered_parsed_meds.medications]
print(f"new_meds_med_list: {new_meds_med_list}")

# match new meds med list to admission day meds
new_admission_day_meds = []
for med in admission_day_meds:
    if med['medication'].split(',')[0].lower() in new_meds_med_list:
        new_admission_day_meds.append(med)

print(f"new_admission_day_meds: {new_admission_day_meds}")



parsed_meds: medications=['Lactulose', 'Sodium Bicarbonate', 'Calcium Carbonate', 'Potassium Chloride', 'Magnesium Oxide', 'Docusate Sodium (Liquid)', 'Famotidine', 'Levothyroxine Sodium', 'Simvastatin', 'Heparin', 'Heparin Flush (5000 Units/mL)', 'Heparin Flush (10 units/ml)', 'Bisacodyl', 'Senna', 'Insuline', 'Dextrose 50%', 'Calcium Gluconate', 'Docusate Sodium (Liquid)', 'Famotidine', 'Levothyroxine Sodium', 'Simvastatin', 'Heparin', 'Heparin Flush (5000 Units/mL)', 'Heparin Flush (10 units/ml)', 'Bisacodyl', 'Senna']
new_meds_med_list: ['lactulose', 'sodium bicarbonate', 'calcium carbonate', 'potassium chloride', 'magnesium oxide', 'calcium gluconate']
new_admission_day_meds: [{'medication': 'Lactulose, PO', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '30 mL, 1 UDCUP'}, {'medication': 'Lactulose, PO', 'start_time': '2138-09-13 00:00:00', 'end_time': '2138-09-13 00:00:00', 'dosage': '30 mL, 1 UDCUP'}, {'medication': 'Sodium Bicarbonate, IV', 's

In [17]:
# create a csv, prompt is HPI + home_meds + labs_baseline + labs_at_admission + scans
# results are individual rows for each new_admission_day_meds

HPI = json.load(open(f'results/notes/HPI_{HADM_ID}.json'))
HPI_text = HPI[0]['0']
chief_complaint_match = re.search(r'(Chief Complaint:[\s\S]*)', HPI_text, re.DOTALL)
HPI = chief_complaint_match.group(1).strip() if chief_complaint_match else HPI_text

admission_info = f"""
Bleow is admission information for a patient.

HPI:
{HPI}

Home meds:
{home_meds_med_list}

Abnormal labs at baseline:
{baseline_labs}

Abnormal labs at admission:
{admission_labs}

Scans done at admission:
{admission_scans}

What medication should be started for this patient at admission?
"""

rows = []
unique_meds = set(med['medication'] for med in new_admission_day_meds)
for med_name in unique_meds:
    row = {
        'HADM_ID': HADM_ID,
        'admit_time': admit_time,
        'HPI': admission_info,
        'medication': med_name,
    }
    rows.append(row)

df = pd.DataFrame(rows)
output_file = f'results/datasets/admission_medications_analysis_{HADM_ID}.csv'
df.to_csv(output_file, index=False)
print(f"\nCreated CSV file: {output_file}")


Created CSV file: results/datasets/admission_medications_analysis_113344.0.csv
