In [98]:
from ollama import chat
from pydantic import BaseModel, Field
from typing import List, Literal, Union, Dict
import json
from datetime import datetime, timedelta
from openai import OpenAI
import os
import pandas as pd
import re


In [89]:
AVAILABLE_MODELS = {
    'llama3.3': 'llama3.3',
    'llama3.3-q8': 'llama3.3:70b-instruct-q8_0',
    'gemma3_fp16': 'gemma3:27b-it-fp16',
    'gemma3_q8': 'gemma3:27b-it-q8_0',
    'deepseek-r1': 'deepseek-r1:70b'
}

DEFAULT_MODEL = 'gemma3_q8'

model = AVAILABLE_MODELS[DEFAULT_MODEL]

In [122]:
HADM_ID = 152136.0

In [123]:
# home medication extraction, use structured output
class HomeMeds(BaseModel):
    medications: List[str] = Field(description="List of medications with dosage forms, e.g. 'Celexa 20mg capsule'")

home_meds = json.load(open(f'results/notes/admission_meds_{HADM_ID}.json'))

meds_prompt = f"""
Extract all medications from the below json file, including the dosage form and dosage strength. e.g. 'Celexa 20mg capsule'
Medications:
{home_meds}
"""

response = chat(
  messages=[
    {
      'role': 'user',
      'content': meds_prompt,
    }
  ],
  model=model,
  format=HomeMeds.model_json_schema(),
)

home_meds_med_list = HomeMeds.model_validate_json(response.message.content)
print(home_meds_med_list)

medications=['Seroquel', 'Librium']


In [124]:
# extract scans from restuls
class Scans(BaseModel):
    scans: Dict[str, str] = Field(description="Dictionary of scans with date and resutls, e.g. {'2124-01-01': 'CT CAP: 1. Extensive retroperitoneal...'}")

scan_notes = json.load(open(f'results/notes/scans_{HADM_ID}.json'))

scan_prompt = f"""
Extract all medical imaging scan results and date from the below json file, e.g. {{'2134-01-01': 'CT CAP: 1. Extensive retroperitoneal...'}}
Scans:
{scan_notes}
"""

response = chat(
  messages=[
    {
      'role': 'user',
      'content': scan_prompt,
    }
  ],
  model=model,
  format=Scans.model_json_schema(),
)

scan_dict = Scans.model_validate_json(response.message.content)
print(scan_dict)

scans={'2145-10-3': 'BLOOD WBC-4.9 RBC-3.95* Hgb-13.5* Hct-38.4* MCV-97 MCH-34.2* MCHC-35.3* RDW-14.9 Plt Ct-165, PT-12.2 PTT-23.1 INR(PT)-1.0, Glucose-194* UreaN-8 Creat-1.1 Na-145 K-3.4 Cl-107 HCO3-19* AnGap-22*, ALT-184* AST-317* CK(CPK)-2060* AlkPhos-141* Amylase-55, CK-MB-7 cTropnT-<0.01, ASA-NEG Ethanol-198* Acetmnp-NEG Bnzodzp-POS Barbitr-NEG Tricycl-NEG', '2145-10-5': 'URINE bnzodzp-POS barbitr-NEG opiates-POS cocaine-NEG amphetm-NEG mthdone-NEG'}


In [125]:
# modify events to exclude any mentioning of medications
class ModifiedEvent(BaseModel):
    modified_text: str = Field(description="Event text with medication mentions replaced")

events = json.load(open(f'results/notes/events_{HADM_ID}.json'))

processed_events = []

for event in events:
    event_text = event.get('events', '')
    
    single_event_prompt = f"""
    Modify the following hospital event text by replacing any mentions of specific medications with the generic word 'medication'.
    Do not remove or alter any other information such as dates, vitals, or non-medication related events.
    
    Event Text:
    {event_text}
    """
    
    response = chat(
        messages=[
            {
                'role': 'user',
                'content': single_event_prompt,
            }
        ],
        model=model,
        format=ModifiedEvent.model_json_schema(),
    )
    
    result = ModifiedEvent.model_validate_json(response.message.content)
    
    # Create a copy of the original event and update only the events field
    modified_event = event.copy()
    modified_event['events'] = result.modified_text
    processed_events.append(modified_event)

# Print the first processed event as an example
print("Sample processed event:")
print(processed_events[1])

Sample processed event:
{'admit_time': '2145-10-03 20:48:00', 'discharge_time': '2145-10-14', 'event_time': '2145-10-11 05:20:00', 'vitals': 'Tmax: 38.4,C (101.1,   Tcurrent: 38.4,C (101.1,   HR: 80 (74 - 105) bpm,   BP: 139/75(93) {110/63(78) - 170/84(111)} mmHg,   ', 'events': '24 Hour Events:, FEVER - 101.1,C - [**2145-10-11**] 04:00 AM,  - On going family discussion re: code status and long term management,  - Neurology: requested slow wean off medication and reapplication if upper,  ext. sz activity.,  - Fi02 has not been weaned, still on 80%,  - Weaned medication, started medication.,  - CTA of chest shows small mediastinal pneumothorax with subcutaneous,  air, and a collapsed lower LLL atelectasis and LRL consolidation.,  - Had bronchoscopy: copious purulent secretions in left lower >,  lingula, no obvious tracheal/bronchial injury. BAL preformed in left,  lower lobe.,  -This am, ICP continued to be elevated; started medication gtt; neuro and,  neurosurg had no new recs.'}


In [126]:
# identify meds started on admission (less than 6 hours after admission)
admit_time = events[0]['admit_time']
admit_time = datetime.strptime(admit_time, '%Y-%m-%d %H:%M:%S')

hospital_meds_med_list = json.load(open(f'results/notes/sample_meds_{HADM_ID}.json'))
admission_day_meds = []
for med in hospital_meds_med_list:
    if 'start_time' in med and med['start_time']:
        med_start_time = datetime.strptime(med['start_time'], '%Y-%m-%d %H:%M:%S')
        if med_start_time < admit_time + timedelta(hours=6):
            admission_day_meds.append(med)

print(admission_day_meds)

# identify scans done at admission (less than 6 hours after admission)
admission_scans = {}
for date_str, scan_text in scan_dict.scans.items():
    try:
        # Try full format first
        scan_date = datetime.strptime(date_str, '%Y-%m-%d')
    except ValueError:
        try:
            scan_date = datetime.strptime(f"{admit_time.year}-{date_str}", '%Y-%m-%d')
        except ValueError:
            print(f"Warning: Could not parse date {date_str}, skipping...")
            continue
    
    if admit_time - timedelta(days=1) < scan_date < admit_time + timedelta(hours=6):
        admission_scans[date_str] = scan_text

print("Admission scans:", admission_scans)

# identify labs done at admission (less than 6 hours after admission)
lab_dict = json.load(open(f'results/notes/sample_lab_{HADM_ID}.json'))

admission_labs = []
baseline_labs = []
for lab in lab_dict:
    lab_time = datetime.strptime(lab['lab_time'], '%Y-%m-%d %H:%M:%S')
    lab_entry = {
        'lab_name': lab['lab_name'],
        'lab_value': lab['lab_value'],
        'lab_time': lab['lab_time']
    }
    
    if admit_time - timedelta(hours=24) < lab_time < admit_time + timedelta(hours=6): 
        admission_labs.append(lab_entry)
    
    if lab_time < admit_time - timedelta(days=2):
        baseline_labs.append(lab_entry)

# Sort baseline labs by time (most recent first) and keep only latest 10
baseline_labs.sort(key=lambda x: datetime.strptime(x['lab_time'], '%Y-%m-%d %H:%M:%S'), reverse=True)
baseline_labs = baseline_labs[:10]

# drop lab_time from admission_labs and baseline_labs
for lab in admission_labs:
    lab.pop('lab_time', None)
for lab in baseline_labs:
    lab.pop('lab_time', None)


print("Admission labs:", admission_labs)
print("\nBaseline labs (latest 10):", baseline_labs)

[{'medication': 'Labetalol, IV', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-05 00:00:00', 'dosage': '10 mg, 0.1 VIAL'}, {'medication': 'Fentanyl Citrate, IV DRIP', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-07 00:00:00', 'dosage': '2.5 mg, 50 mL'}, {'medication': 'NS, IV DRIP', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-07 00:00:00', 'dosage': '250 mL, 250 mL'}, {'medication': 'Thiamine, IV', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-09 00:00:00', 'dosage': '100 mg, 1 mL'}, {'medication': 'Midazolam, IV DRIP', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-10 00:00:00', 'dosage': '100 mg, 20 mL'}, {'medication': 'NS, IV DRIP', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-10 00:00:00', 'dosage': '100 mL, 100 mL'}, {'medication': 'Chlorhexidine Gluconate 0.12% Oral Rinse, ORAL', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-14 00:00:00', 'dosage': '15 mL, 1 UDCUP'}, {'medication': 'FoLIC Acid

In [127]:
# Admission meds compare to home meds, new meds

class AdmitMeds(BaseModel):
    medications: List[str] = Field(description="List of new medication names, e.g. 'Propofol'")


new_meds_prompt = f"""
You are provided with two medication lists. 
Identify and return NEW and DIFFERENT medications in LIST 2 compared to LIST 1. 
Return a list of medication names, EXACTLY as they are in LIST 2, e.g. "Propofol"

LIST 1: 
{home_meds_med_list}

LIST 2:
{admission_day_meds}
"""

response = chat(
  messages=[
    {
      'role': 'user',
      'content': new_meds_prompt,
    }
  ],
  model=model,
  format=AdmitMeds.model_json_schema(),
)

parsed_meds = AdmitMeds.model_validate_json(response.message.content)
print(f"parsed_meds: {parsed_meds}")

filtered_meds_prompt = f"""
You are provided with a list of medications.
Review and remove medications that are 
1. IV fluids (such as NS, D5W). 
2.used for intubation (e.g. midazolam, vecuronium, propofol). 
3.used for pain (e.g. fentanyl, opioids). 
4. supportive meds (e.g. senna, vitamins, tylenol, electrolyte replacement, DVT prophylaxis such as enoxaparin or heparin sq)
In other words, only include medications that are essential for the patient's treatment.

LIST:
{parsed_meds}
"""
response_2 = chat(
  messages=[
    {
      'role': 'user',
      'content': filtered_meds_prompt,
    }
  ],
  model=model,
  format=AdmitMeds.model_json_schema(),
)

filtered_parsed_meds = AdmitMeds.model_validate_json(response_2.message.content)
new_meds_med_list = [med.split(',')[0].lower() for med in filtered_parsed_meds.medications]
print(f"new_meds_med_list: {new_meds_med_list}")

# match new meds med list to admission day meds
new_admission_day_meds = []
for med in admission_day_meds:
    if med['medication'].split(',')[0].lower() in new_meds_med_list:
        new_admission_day_meds.append(med)

print(f"new_admission_day_meds: {new_admission_day_meds}")



parsed_meds: medications=['Propofol', 'Fentanyl', 'Rocuronium', 'NS', 'Magneisum Sulfate', 'Multivitamins', 'Potassium Chloride', 'Labatalol', 'Cisatracurium Besylate', 'D5W', 'Calcium Gluconate', 'FoLIC Acid', 'Magneisum Sulfate', 'Multivitamins', 'Potassium Chloride', 'Potassium Chloride', 'Labatalol', 'Potassium Chloride', 'D5W', 'Calcium Gluconate', 'NS', 'D5W']
new_meds_med_list: ['magneisum sulfate', 'labatalol', 'cisatracurium besylate', 'calcium gluconate']
new_admission_day_meds: [{'medication': 'Cisatracurium Besylate, IV', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-05 00:00:00', 'dosage': '100 mg, 0.5 VIAL'}, {'medication': 'Calcium Gluconate, IV', 'start_time': '2145-10-04 00:00:00', 'end_time': '2145-10-05 00:00:00', 'dosage': '2 gm, 2 VIAL'}]


In [128]:
# create a csv, prompt is HPI + home_meds + labs_baseline + labs_at_admission + scans
# results are individual rows for each new_admission_day_meds

HPI = json.load(open(f'results/notes/HPI_{HADM_ID}.json'))
HPI_text = HPI[0]['0']
chief_complaint_match = re.search(r'(Chief Complaint:[\s\S]*)', HPI_text, re.DOTALL)
HPI = chief_complaint_match.group(1).strip() if chief_complaint_match else HPI_text

admission_info = f"""
Bleow is admission information for a patient.

HPI:
{HPI}

Home meds:
{home_meds_med_list}

Abnormal labs at baseline:
{baseline_labs}

Abnormal labs at admission:
{admission_labs}

Scans done at admission:
{admission_scans}

What medication should be started for this patient at admission?
"""

rows = []
unique_meds = set(med['medication'] for med in new_admission_day_meds)
for med_name in unique_meds:
    row = {
        'HADM_ID': HADM_ID,
        'admit_time': admit_time,
        'HPI': admission_info,
        'medication': med_name,
    }
    rows.append(row)

df = pd.DataFrame(rows)
output_file = f'results/datasets/admission_medications_analysis_{HADM_ID}.csv'
df.to_csv(output_file, index=False)
print(f"\nCreated CSV file: {output_file}")


Created CSV file: results/datasets/admission_medications_analysis_152136.0.csv
