In [2]:
import pandas as pd
import sqlite3
import time

In [2]:
vitalsigns = pd.read_csv('/media/data/huyennm/mimic-iv/mimic-derived/vitalsigns.csv')
vitalsigns.head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
0,10000032,29079034,39553978,18704.0,2180-07-23 14:00:00,2180-07-23 14:20:00,223761,98.7,98.7,°F,0.0
1,10000032,29079034,39553978,18704.0,2180-07-23 14:12:00,2180-07-23 14:17:00,220045,91.0,91.0,bpm,0.0
2,10000032,29079034,39553978,18704.0,2180-07-23 14:12:00,2180-07-23 14:17:00,220210,24.0,24.0,insp/min,0.0
3,10000032,29079034,39553978,18704.0,2180-07-23 14:30:00,2180-07-23 14:43:00,220045,93.0,93.0,bpm,0.0
4,10000032,29079034,39553978,18704.0,2180-07-23 14:30:00,2180-07-23 14:43:00,220210,21.0,21.0,insp/min,0.0


In [22]:
target_patients = pd.read_csv("/media/data/huyennm/mimic-iv/mimiciv_data/eligible_patients.csv")

In [3]:
target_patients.head()

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,charttime,vent,age_at_admission
0,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 05:00:00,1,66
1,18421337,22413411,30000484,2136-01-14 17:23:32,2136-01-17 04:53:08,2136-01-15 16:00:00,1,92
2,15726459,22744101,30000831,2140-04-17 21:26:33,2140-04-20 14:21:57,2140-04-18 20:28:00,1,78
3,12168737,29283664,30001336,2186-03-20 00:44:48,2186-03-22 19:25:44,2186-03-21 00:00:00,1,77
4,14311522,24622512,30002548,2111-08-17 13:13:43,2111-08-18 18:50:31,2111-08-18 12:00:00,1,70


In [5]:
outputevents = pd.read_csv('/media/data3/biodataset/MIMIC_IV/MIMIC-IV-v3.0/physionet.org/files/mimiciv/3.0/icu/outputevents.csv')

In [33]:
query = """
SELECT 
  tp.subject_id, 
  tp.hadm_id, 
  tp.stay_id, 
  tp.intime, 
  tp.outtime, 
  vs.charttime,

  -- Temperature (converted to Celsius if necessary)
  CASE 
    WHEN vs.itemid = 223762 THEN vs.valuenum -- Celsius
    WHEN vs.itemid = 223761 THEN (vs.valuenum - 32) / 1.8 -- Convert Fahrenheit to Celsius
    ELSE NULL 
  END AS temperature,

  -- Heart rate
  CASE WHEN vs.itemid = 220045 THEN vs.valuenum ELSE NULL END AS heartrate,

  -- Aortic Pressure Signal - Diastolic
  CASE WHEN vs.itemid = 228151 THEN vs.valuenum ELSE NULL END AS aorticpressure_dias,

  -- Mean Aortic Pressure Signal - Diastolic (mean calculated over stay)
  AVG(CASE WHEN vs.itemid = 228151 THEN vs.valuenum ELSE NULL END) 
    OVER (PARTITION BY vs.stay_id) AS mean_aorticpressure_dias,

  -- Aortic Pressure Signal - Systolic
  CASE WHEN vs.itemid = 228152 THEN vs.valuenum ELSE NULL END AS aorticpressure_sys,

  -- Mean Aortic Pressure Signal - Systolic (mean calculated over stay)
  AVG(CASE WHEN vs.itemid = 228152 THEN vs.valuenum ELSE NULL END) 
    OVER (PARTITION BY vs.stay_id) AS mean_aorticpressure_sys,

  -- Respiratory Rate
  CASE WHEN vs.itemid IN (220210, 224690) THEN vs.valuenum ELSE NULL END AS RespRate

FROM vitalsigns vs
JOIN target_patients tp 
  ON vs.subject_id = tp.subject_id 
  AND vs.hadm_id = tp.hadm_id 
  AND vs.stay_id = tp.stay_id

-- Filter for the first 24 hours of ICU stay
WHERE vs.charttime >= DATETIME(tp.intime, '-6 hour')
  AND vs.charttime <= DATETIME(tp.intime, '+30 day')
  AND vs.stay_id = 30000213

-- Exclude erroneous values
AND vs.warning IS DISTINCT FROM 1

-- Filter by the specific vital sign item IDs
AND vs.itemid IN (
  223761, -- Temperature F
  223762, -- Temperature C
  220045, -- Heart Rate
  228151, -- Aortic Pressure Signal - Diastolic
  228152, -- Aortic Pressure Signal - Systolic
  220210, -- Respiratory Rate
  224690  -- Respiratory Rate (Total)
)

ORDER BY vs.subject_id, vs.hadm_id, vs.stay_id, vs.charttime;

"""

In [4]:
conn = sqlite3.connect(":memory:")

target_patients.to_sql('target_patients', conn, index=False, if_exists='replace')
vitalsigns.to_sql('vitalsigns', conn, index=False, if_exists='replace')

20625856

In [53]:
specific_vital = vitalsigns[vitalsigns['stay_id'] == 30000213]
specific_vital.to_sql('specific_vitalsigns', conn, index=False, if_exists='replace')

95

In [32]:
vitalsigns_query = """
WITH NonNullValues AS (
  SELECT
    tp.subject_id,
    tp.hadm_id,
    tp.stay_id,
    tp.intime,
    tp.outtime,
    vs.charttime,

    -- Temperature (converted to Celsius if necessary)
    CASE 
      WHEN vs.itemid = 223762 THEN vs.valuenum -- Celsius
      WHEN vs.itemid = 223761 THEN (vs.valuenum - 32) / 1.8 -- Convert Fahrenheit to Celsius
      ELSE NULL 
    END AS temperature,

    -- Heart rate
    CASE WHEN vs.itemid = 220045 THEN vs.valuenum ELSE NULL END AS heartrate,

    -- Aortic Pressure Signal - Diastolic
    CASE WHEN vs.itemid = 228151 THEN vs.valuenum ELSE NULL END AS aorticpressure_dias,

    -- Aortic Pressure Signal - Systolic
    CASE WHEN vs.itemid = 228152 THEN vs.valuenum ELSE NULL END AS aorticpressure_sys,

    -- Respiratory Rate
    CASE WHEN vs.itemid IN (220210, 224690) THEN vs.valuenum ELSE NULL END AS RespRate,

    -- Calculate mean Aortic Pressure Diastolic
    AVG(CASE WHEN vs.itemid = 228151 THEN vs.valuenum ELSE NULL END)
    OVER (PARTITION BY tp.stay_id) AS mean_aorticpressure_dias,

    -- Calculate mean Aortic Pressure Systolic
    AVG(CASE WHEN vs.itemid = 228152 THEN vs.valuenum ELSE NULL END)
    OVER (PARTITION BY tp.stay_id) AS mean_aorticpressure_sys,

    -- Row number for last non-null temperature within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (223761, 223762) AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS temp_row_num_last_24h,

    -- Row number for first non-null temperature within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (223761, 223762) AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS temp_row_num_first_10d,

    -- Row number for last non-null heart rate within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 220045 AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS hr_row_num_last_24h,

    -- Row number for first non-null heart rate within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 220045 AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS hr_row_num_first_10d,

    -- Row number for last non-null Aortic Pressure Diastolic within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228151 AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS aortic_dias_row_num_last_24h,

    -- Row number for first non-null Aortic Pressure Diastolic within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228151 AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS aortic_dias_row_num_first_10d,

    -- Row number for last non-null Aortic Pressure Systolic within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228152 AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS aortic_sys_row_num_last_24h,

    -- Row number for first non-null Aortic Pressure Systolic within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228152 AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS aortic_sys_row_num_first_10d,

    -- Row number for last non-null Respiratory Rate within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (220210, 224690) AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS resp_row_num_last_24h,

    -- Row number for first non-null Respiratory Rate within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (220210, 224690) AND vs.valuenum IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS resp_row_num_first_10d

  FROM vitalsigns vs
  JOIN target_patients tp 
    ON vs.subject_id = tp.subject_id
    AND vs.hadm_id = tp.hadm_id
    AND vs.stay_id = tp.stay_id

  -- Filter by the relevant vital sign item IDs
  WHERE vs.itemid IN (
    223761, -- Temperature F
    223762, -- Temperature C
    220045, -- Heart Rate
    228151, -- Aortic Pressure Signal - Diastolic
    228152, -- Aortic Pressure Signal - Systolic
    220210, -- Respiratory Rate
    224690  -- Respiratory Rate (Total)
  )
)

-- Combine the most suitable values per column
SELECT
  subject_id,
  hadm_id,
  stay_id,
  intime,
  outtime,
  -- Coalesce to return the last non-null value within 24h or the first available non-null one within 10 days for each column
  COALESCE(
    MAX(CASE WHEN temp_row_num_last_24h = 1 THEN temperature END),
    MAX(CASE WHEN temp_row_num_first_10d = 1 THEN temperature END)
  ) AS temperature,

  COALESCE(
    MAX(CASE WHEN hr_row_num_last_24h = 1 THEN heartrate END),
    MAX(CASE WHEN hr_row_num_first_10d = 1 THEN heartrate END)
  ) AS heart_rate,

  COALESCE(
    MAX(CASE WHEN aortic_dias_row_num_last_24h = 1 THEN aorticpressure_dias END),
    MAX(CASE WHEN aortic_dias_row_num_first_10d = 1 THEN aorticpressure_dias END)
  ) AS aorticpressure_dias,

  COALESCE(
    MAX(CASE WHEN aortic_sys_row_num_last_24h = 1 THEN aorticpressure_sys END),
    MAX(CASE WHEN aortic_sys_row_num_first_10d = 1 THEN aorticpressure_sys END)
  ) AS aorticpressure_sys,

  COALESCE(
    MAX(CASE WHEN resp_row_num_last_24h = 1 THEN RespRate END),
    MAX(CASE WHEN resp_row_num_first_10d = 1 THEN RespRate END)
  ) AS resp_rate,

  -- Include the mean values for Aortic Pressure Diastolic and Systolic
  MAX(mean_aorticpressure_dias) AS mean_aorticpressure_dias,
  MAX(mean_aorticpressure_sys) AS mean_aorticpressure_sys

FROM NonNullValues
GROUP BY subject_id, hadm_id, stay_id
ORDER BY subject_id, hadm_id, stay_id;

"""

In [104]:
notnull = """
WITH NonNullValues AS (
  SELECT
    tp.subject_id,
    tp.hadm_id,
    tp.stay_id,
    tp.intime,
    tp.outtime,
    CASE 
      WHEN vs.itemid = 223762 THEN vs.charttime -- Celsius
      WHEN vs.itemid = 223761 THEN vs.charttime -- Convert Fahrenheit to Celsius
      ELSE NULL 
    END AS temperature_charttime,

    -- Temperature (converted to Celsius if necessary)
    CASE 
      WHEN vs.itemid = 223762 THEN vs.value -- Celsius
      WHEN vs.itemid = 223761 THEN (vs.value - 32) / 1.8 -- Convert Fahrenheit to Celsius
      ELSE NULL 
    END AS temperature,

    -- Row number for last non-null temperature within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (223761, 223762) AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS temp_row_num_last_24h,

    -- Row number for first non-null temperature within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (223761, 223762) AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS temp_row_num_first_10d,

    -- Heart rate
    CASE 
      WHEN vs.itemid = 220045 THEN vs.charttime
      ELSE NULL 
    END AS hr_charttime,
    
    CASE WHEN vs.itemid = 220045 THEN vs.value ELSE NULL END AS heartrate,

        -- Row number for last non-null heart rate within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN (vs.itemid = 220045 AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day')) THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS hr_row_num_last_24h,

    -- Row number for first non-null heart rate within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN (vs.itemid = 220045 AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day')) THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS hr_row_num_first_10d,

    -- Aortic Pressure Signal - Diastolic
    CASE 
      WHEN vs.itemid = 228151 THEN vs.charttime
      ELSE NULL 
    END AS apd_charttime,
    
    CASE WHEN vs.itemid = 228151 THEN vs.value ELSE NULL END AS aorticpressure_dias,

        -- Row number for last non-null Aortic Pressure Diastolic within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228151 AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS aortic_dias_row_num_last_24h,

    -- Row number for first non-null Aortic Pressure Diastolic within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228151 AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS aortic_dias_row_num_first_10d,

    -- Aortic Pressure Signal - Systolic
    CASE 
      WHEN vs.itemid = 228152 THEN vs.charttime
      ELSE NULL 
    END AS aps_charttime,
    
    CASE WHEN vs.itemid = 228152 THEN vs.value ELSE NULL END AS aorticpressure_sys,

        -- Row number for last non-null Aortic Pressure Systolic within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228152 AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS aortic_sys_row_num_last_24h,

    -- Row number for first non-null Aortic Pressure Systolic within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid = 228152 AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS aortic_sys_row_num_first_10d,

    -- Respiratory Rate
    CASE 
      WHEN vs.itemid IN (220210, 224690) THEN vs.charttime
      ELSE NULL 
    END AS rr_charttime,
    
    CASE WHEN vs.itemid IN (220210, 224690) THEN vs.value ELSE NULL END AS RespRate,

    -- Row number for last non-null Respiratory Rate within 24 hours
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (220210, 224690) AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+1 day') THEN vs.charttime 
                ELSE NULL 
              END DESC
    ) AS resp_row_num_last_24h,

    -- Row number for first non-null Respiratory Rate within 10 days
    ROW_NUMBER() OVER (
      PARTITION BY tp.subject_id, tp.hadm_id, tp.stay_id
      ORDER BY CASE 
                WHEN vs.itemid IN (220210, 224690) AND vs.value IS NOT NULL AND vs.charttime >= tp.intime 
    AND vs.charttime <= DATETIME(tp.intime, '+15 day') THEN vs.charttime 
                ELSE NULL 
              END ASC
    ) AS resp_row_num_first_10d,

    -- Calculate mean Aortic Pressure Diastolic
    AVG(CASE WHEN vs.itemid = 228151 THEN vs.value ELSE NULL END)
    OVER (PARTITION BY tp.stay_id) AS mean_aorticpressure_dias,

    -- Calculate mean Aortic Pressure Systolic
    AVG(CASE WHEN vs.itemid = 228152 THEN vs.value ELSE NULL END)
    OVER (PARTITION BY tp.stay_id) AS mean_aorticpressure_sys


  FROM specific_vitalsigns vs
  JOIN target_patients tp 
    ON vs.subject_id = tp.subject_id
    AND vs.hadm_id = tp.hadm_id
    AND vs.stay_id = tp.stay_id

  -- Filter by the relevant vital sign item IDs
  WHERE vs.itemid IN (
    223761, -- Temperature F
    223762, -- Temperature C
    220045, -- Heart Rate
    228151, -- Aortic Pressure Signal - Diastolic
    228152, -- Aortic Pressure Signal - Systolic
    220210, -- Respiratory Rate
    224690  -- Respiratory Rate (Total)
  ) AND vs.value IS NOT NULL
)
SELECT * FROM NonNullValues;
"""

In [156]:
subquery = """
WITH NonNullValues AS (
  SELECT
    tp.subject_id,
    tp.hadm_id,
    tp.stay_id,
    tp.intime,
    tp.outtime,
    vs.itemid,  -- Include vs.itemid to be accessible in the outer query
    CASE 
      WHEN vs.itemid = 223762 THEN vs.charttime -- Celsius
      WHEN vs.itemid = 223761 THEN vs.charttime -- Fahrenheit
      ELSE NULL 
    END AS temperature_charttime,

    -- Temperature (converted to Celsius if necessary)
    CASE 
      WHEN vs.itemid = 223762 THEN vs.value -- Celsius
      WHEN vs.itemid = 223761 THEN (vs.value - 32) / 1.8 -- Convert Fahrenheit to Celsius
      ELSE NULL 
    END AS temperature,

    -- Heart rate
    CASE 
      WHEN vs.itemid = 220045 THEN vs.charttime
      ELSE NULL 
    END AS hr_charttime,
    
    CASE WHEN vs.itemid = 220045 THEN vs.value ELSE NULL END AS heartrate,
    
    -- Aortic Pressure Signal - Diastolic
    CASE 
      WHEN vs.itemid = 228151 THEN vs.charttime
      ELSE NULL 
    END AS apd_charttime,
    
    CASE WHEN vs.itemid = 228151 THEN vs.value ELSE NULL END AS aorticpressure_dias,

    AVG(CASE WHEN vs.itemid = 228151 THEN vs.valuenum ELSE NULL END)
    OVER (PARTITION BY tp.stay_id) AS mean_aorticpressure_dias,

    -- Aortic Pressure Signal - Systolic
    CASE 
      WHEN vs.itemid = 228152 THEN vs.charttime
      ELSE NULL 
    END AS aps_charttime,
    
    CASE WHEN vs.itemid = 228152 THEN vs.value ELSE NULL END AS aorticpressure_sys,

    AVG(CASE WHEN vs.itemid = 228152 THEN vs.valuenum ELSE NULL END)
    OVER (PARTITION BY tp.stay_id) AS mean_aorticpressure_sys,

    -- Respiratory Rate
    CASE 
      WHEN vs.itemid IN (220210, 224690) THEN vs.charttime
      ELSE NULL 
    END AS rr_charttime,
    
    CASE WHEN vs.itemid IN (220210, 224690) THEN vs.value ELSE NULL END AS RespRate
  FROM vitalsigns vs
  JOIN target_patients tp 
    ON vs.subject_id = tp.subject_id
    AND vs.hadm_id = tp.hadm_id
    AND vs.stay_id = tp.stay_id

  -- Filter by the relevant vital sign item IDs
  WHERE vs.itemid IN (
    223761, -- Temperature F
    223762, -- Temperature C
    220045, -- Heart Rate
    228151, -- Aortic Pressure Signal - Diastolic
    228152, -- Aortic Pressure Signal - Systolic
    220210, -- Respiratory Rate
    224690  -- Respiratory Rate (Total)
  ) AND vs.value IS NOT NULL
), rn_values AS (
SELECT *,
       -- Row number for last non-null temperature within 24 hours
       ROW_NUMBER() OVER (
         PARTITION BY subject_id, hadm_id, stay_id
         ORDER BY CASE 
                   WHEN itemid IN (223761, 223762) 
                   AND temperature IS NOT NULL 
                   AND temperature_charttime >= intime 
                   AND temperature_charttime <= DATETIME(intime, '+1 day') 
                   THEN temperature_charttime 
                   ELSE NULL 
                 END DESC
       ) AS temp_row_num_last_24h,

       -- Row number for first non-null temperature within 10 days
       ROW_NUMBER() OVER (
         PARTITION BY subject_id, hadm_id, stay_id
         ORDER BY CASE 
                   WHEN itemid IN (223761, 223762) 
                   AND temperature IS NOT NULL 
                   AND temperature_charttime >= intime 
                   AND temperature_charttime <= DATETIME(intime, '+15 day') 
                   THEN temperature_charttime 
                   ELSE NULL 
                 END ASC
       ) AS temp_row_num_first_10d,

       -- Row number for last non-null heart rate within 24 hours
       ROW_NUMBER() OVER (
         PARTITION BY subject_id, hadm_id, stay_id
         ORDER BY CASE 
                   WHEN itemid = 220045 
                   AND heartrate IS NOT NULL 
                   AND hr_charttime >= intime 
                   AND hr_charttime <= DATETIME(intime, '+1 day') 
                   THEN hr_charttime 
                   ELSE NULL 
                 END DESC
       ) AS hr_row_num_last_24h,

       -- Row number for first non-null heart rate within 10 days
       ROW_NUMBER() OVER (
         PARTITION BY subject_id, hadm_id, stay_id
         ORDER BY CASE 
                   WHEN itemid = 220045 
                   AND heartrate IS NOT NULL 
                   AND hr_charttime >= intime 
                   AND hr_charttime <= DATETIME(intime, '+15 day') 
                   THEN hr_charttime 
                   ELSE NULL 
                 END ASC
       ) AS hr_row_num_first_10d,

       -- Row number for last non-null Aortic Pressure Diastolic within 24 hours
ROW_NUMBER() OVER (
  PARTITION BY subject_id, hadm_id, stay_id
  ORDER BY CASE 
            WHEN itemid = 228151  -- Diastolic
            AND aorticpressure_dias IS NOT NULL 
            AND apd_charttime >= intime 
            AND apd_charttime <= DATETIME(intime, '+1 day') 
            THEN apd_charttime 
            ELSE NULL 
          END DESC
) AS aortic_dias_row_num_last_24h,

-- Row number for first non-null Aortic Pressure Diastolic within 10 days
ROW_NUMBER() OVER (
  PARTITION BY subject_id, hadm_id, stay_id
  ORDER BY CASE 
            WHEN itemid = 228151  -- Diastolic
            AND aorticpressure_dias IS NOT NULL 
            AND apd_charttime >= intime 
            AND apd_charttime <= DATETIME(intime, '+15 day') 
            THEN apd_charttime 
            ELSE NULL 
          END ASC
) AS aortic_dias_row_num_first_10d,

-- Row number for last non-null Aortic Pressure Systolic within 24 hours
ROW_NUMBER() OVER (
  PARTITION BY subject_id, hadm_id, stay_id
  ORDER BY CASE 
            WHEN itemid = 228152  -- Systolic
            AND aorticpressure_sys IS NOT NULL 
            AND aps_charttime >= intime 
            AND aps_charttime <= DATETIME(intime, '+1 day') 
            THEN aps_charttime 
            ELSE NULL 
          END DESC
) AS aortic_sys_row_num_last_24h,

-- Row number for first non-null Aortic Pressure Systolic within 10 days
ROW_NUMBER() OVER (
  PARTITION BY subject_id, hadm_id, stay_id
  ORDER BY CASE 
            WHEN itemid = 228152  -- Systolic
            AND aorticpressure_sys IS NOT NULL 
            AND aps_charttime >= intime 
            AND aps_charttime <= DATETIME(intime, '+15 day') 
            THEN aps_charttime 
            ELSE NULL 
          END ASC
) AS aortic_sys_row_num_first_10d,

-- Row number for last non-null Respiratory Rate within 24 hours
ROW_NUMBER() OVER (
  PARTITION BY subject_id, hadm_id, stay_id
  ORDER BY CASE 
            WHEN itemid IN (220210, 224690)  -- Respiratory Rate
            AND RespRate IS NOT NULL 
            AND rr_charttime >= intime 
            AND rr_charttime <= DATETIME(intime, '+1 day') 
            THEN rr_charttime 
            ELSE NULL 
          END DESC
) AS resp_row_num_last_24h,

-- Row number for first non-null Respiratory Rate within 10 days
ROW_NUMBER() OVER (
  PARTITION BY subject_id, hadm_id, stay_id
  ORDER BY CASE 
            WHEN itemid IN (220210, 224690)  -- Respiratory Rate
            AND RespRate IS NOT NULL 
            AND rr_charttime >= intime 
            AND rr_charttime <= DATETIME(intime, '+15 day') 
            THEN rr_charttime 
            ELSE NULL 
          END ASC
) AS resp_row_num_first_10d

FROM NonNullValues
)
SELECT
  subject_id,
  hadm_id,
  stay_id,
  intime,
  outtime,
  
  -- Coalesce to return the last non-null value within 24h or the first available non-null one within 10 days for each column
  COALESCE(
    MAX(CASE WHEN temp_row_num_last_24h = 1 THEN temperature END),
    MAX(CASE WHEN temp_row_num_first_10d = 1 THEN temperature END)
  ) AS temperature,

  COALESCE(
    MAX(CASE WHEN hr_row_num_last_24h = 1 THEN heartrate END),
    MAX(CASE WHEN hr_row_num_first_10d = 1 THEN heartrate END)
  ) AS heart_rate,

  COALESCE(
    MAX(CASE WHEN aortic_dias_row_num_last_24h = 1 THEN aorticpressure_dias END),
    MAX(CASE WHEN aortic_dias_row_num_first_10d = 1 THEN aorticpressure_dias END)
  ) AS aorticpressure_dias,

  COALESCE(
    MAX(CASE WHEN aortic_sys_row_num_last_24h = 1 THEN aorticpressure_sys END),
    MAX(CASE WHEN aortic_sys_row_num_first_10d = 1 THEN aorticpressure_sys END)
  ) AS aorticpressure_sys,

  COALESCE(
    MAX(CASE WHEN resp_row_num_last_24h = 1 THEN RespRate END),
    MAX(CASE WHEN resp_row_num_first_10d = 1 THEN RespRate END)
  ) AS resp_rate,

  -- Include the mean values for Aortic Pressure Diastolic and Systolic
  MAX(mean_aorticpressure_dias) AS mean_aorticpressure_dias,
  MAX(mean_aorticpressure_sys) AS mean_aorticpressure_sys

FROM rn_values
GROUP BY subject_id, hadm_id, stay_id
ORDER BY subject_id, hadm_id, stay_id;
"""

In [157]:
start_time = time.time()
result_df = pd.read_sql_query(subquery, conn)
end_time = time.time()
execution_time = end_time - start_time
print(f"Time taken: {execution_time:.4f} seconds")

Time taken: 161.2821 seconds


In [158]:
result_df.head()

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,temperature,heart_rate,aorticpressure_dias,aorticpressure_sys,resp_rate,mean_aorticpressure_dias,mean_aorticpressure_sys
0,10000690,25860671,37081114,2150-11-02 19:37:00,2150-11-06 17:03:17,36.666667,80.0,,,35.0,,
1,10000980,26913865,39765666,2189-06-27 08:42:00,2189-06-27 20:38:27,37.055556,69.0,,,21.0,,
2,10001843,26133978,39698942,2134-12-05 18:50:03,2134-12-06 14:38:26,36.388889,116.0,,,27.0,,
3,10002013,23581541,39060235,2160-05-18 10:00:53,2160-05-19 17:33:33,36.222222,95.0,,,16.0,,
4,10002155,23822395,33685454,2129-08-04 12:45:00,2129-08-10 17:02:38,36.833333,78.0,,,17.0,,


In [147]:
a = result_df.sort_values(by='hr_charttime')
selected_columns = a[['hr_charttime', 'heartrate', 'hr_row_num_last_24h', 'hr_row_num_first_10d']]
selected_columns[selected_columns['hr_row_num_first_10d'] == 1]

Unnamed: 0,hr_charttime,heartrate,hr_row_num_last_24h,hr_row_num_first_10d
33,,,26,1


In [167]:
temperature_df = result_df[~result_df['temperature'].isna()]
temperature_df

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,temperature,heart_rate,aorticpressure_dias,aorticpressure_sys,resp_rate,mean_aorticpressure_dias,mean_aorticpressure_sys
0,10000690,25860671,37081114,2150-11-02 19:37:00,2150-11-06 17:03:17,36.666667,80.0,,,35.0,,
1,10000980,26913865,39765666,2189-06-27 08:42:00,2189-06-27 20:38:27,37.055556,69.0,,,21.0,,
2,10001843,26133978,39698942,2134-12-05 18:50:03,2134-12-06 14:38:26,36.388889,116.0,,,27.0,,
3,10002013,23581541,39060235,2160-05-18 10:00:53,2160-05-19 17:33:33,36.222222,95.0,,,16.0,,
4,10002155,23822395,33685454,2129-08-04 12:45:00,2129-08-10 17:02:38,36.833333,78.0,,,17.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
25777,19998330,23151993,31085771,2178-09-20 21:40:00,2178-09-21 18:06:26,36.555556,68.0,,,23.0,,
25778,19998330,24096838,33428243,2178-11-27 22:53:00,2178-11-29 21:29:39,36.500000,80.0,,,25.0,,
25779,19998330,24492004,32641669,2178-10-01 08:51:00,2178-10-03 23:25:08,37.388889,79.0,,,17.0,,
25780,19998591,24349193,31144045,2185-07-16 18:48:18,2185-07-26 18:27:01,35.888889,91.0,,,21.0,,


In [159]:
# Filter non-null values for Aortic Pressure Systolic and sort by aps_charttime
aortic_sys_df = result_df[~result_df['aorticpressure_sys'].isna()].sort_values(by='aps_charttime')

# Select the relevant columns
selected_columns_sys = aortic_sys_df[['aps_charttime', 'aorticpressure_sys', 'aortic_sys_row_num_last_24h', 'aortic_sys_row_num_first_10d']]

# Display the result
selected_columns_sys


KeyError: 'aps_charttime'

In [86]:
c = result_df[result_df['hr_row_num_first_10d'] == 1]
selected_columns = c[['hr_charttime', 'heartrate', 'hr_row_num_last_24h', 'hr_row_num_first_10d']]
selected_columns

Unnamed: 0,hr_charttime,heartrate,hr_row_num_last_24h,hr_row_num_first_10d
33,,,26,1


In [124]:
specific_vital[specific_vital['itemid'].isin([220210,224690])].sort_values(by='charttime')

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
6529554,13180007,27543152,30000213,8083.0,2162-06-21 05:45:00,2162-06-21 05:47:00,224690,24.0,24.0,insp/min,0.0
6529612,13180007,27543152,30000213,52306.0,2162-06-21 05:46:00,2162-06-21 06:21:00,220210,22.0,22.0,insp/min,0.0
6529614,13180007,27543152,30000213,52306.0,2162-06-21 06:00:00,2162-06-21 06:21:00,220210,19.0,19.0,insp/min,0.0
6529556,13180007,27543152,30000213,38499.0,2162-06-21 07:00:00,2162-06-21 07:25:00,224690,26.0,26.0,insp/min,0.0
6529584,13180007,27543152,30000213,49246.0,2162-06-21 07:00:00,2162-06-21 08:21:00,220210,23.0,23.0,insp/min,0.0
6529586,13180007,27543152,30000213,49246.0,2162-06-21 08:00:00,2162-06-21 08:21:00,220210,18.0,18.0,insp/min,0.0
6529589,13180007,27543152,30000213,49246.0,2162-06-21 09:00:00,2162-06-21 09:09:00,220210,26.0,26.0,insp/min,0.0
6529591,13180007,27543152,30000213,49246.0,2162-06-21 10:00:00,2162-06-21 12:07:00,220210,20.0,20.0,insp/min,0.0
6529557,13180007,27543152,30000213,38499.0,2162-06-21 11:00:00,2162-06-21 11:09:00,224690,22.0,22.0,insp/min,0.0
6529593,13180007,27543152,30000213,49246.0,2162-06-21 11:00:00,2162-06-21 12:07:00,220210,22.0,22.0,insp/min,0.0


In [51]:
b = a[~a['temperature'].isna()]
b

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,charttime,temperature,heartrate,aorticpressure_dias,aorticpressure_sys,...,temp_row_num_last_24h,temp_row_num_first_10d,hr_row_num_last_24h,hr_row_num_first_10d,aortic_dias_row_num_last_24h,aortic_dias_row_num_first_10d,aortic_sys_row_num_last_24h,aortic_sys_row_num_first_10d,resp_row_num_last_24h,resp_row_num_first_10d
0,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 04:00:00,37.055556,,,,...,1,92,68,43,78,78,78,78,78,46
1,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 01:00:00,37.166667,,,,...,2,91,67,42,73,73,73,73,73,41
2,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 21:00:00,37.277778,,,,...,3,90,66,41,69,69,69,69,69,37
3,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 17:00:00,37.555556,,,,...,4,89,64,39,61,61,61,61,61,29
4,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 16:00:00,37.055556,,,,...,5,88,63,38,59,59,59,59,59,27
5,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 12:00:00,37.166667,,,,...,6,87,62,37,53,53,53,53,53,21
6,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 08:00:00,36.444444,,,,...,7,86,61,36,49,49,49,49,49,17
7,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 06:21:00,36.333333,,,,...,8,85,65,40,65,65,65,65,65,33
92,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 08:00:00,37.111111,,,,...,93,93,58,33,34,34,34,34,34,2
93,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 12:00:00,37.111111,,,,...,94,94,59,34,39,39,39,39,39,7


In [52]:
b = b.sort_values(by='charttime')
b

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,charttime,temperature,heartrate,aorticpressure_dias,aorticpressure_sys,...,temp_row_num_last_24h,temp_row_num_first_10d,hr_row_num_last_24h,hr_row_num_first_10d,aortic_dias_row_num_last_24h,aortic_dias_row_num_first_10d,aortic_sys_row_num_last_24h,aortic_sys_row_num_first_10d,resp_row_num_last_24h,resp_row_num_first_10d
7,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 06:21:00,36.333333,,,,...,8,85,65,40,65,65,65,65,65,33
6,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 08:00:00,36.444444,,,,...,7,86,61,36,49,49,49,49,49,17
5,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 12:00:00,37.166667,,,,...,6,87,62,37,53,53,53,53,53,21
4,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 16:00:00,37.055556,,,,...,5,88,63,38,59,59,59,59,59,27
3,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 17:00:00,37.555556,,,,...,4,89,64,39,61,61,61,61,61,29
2,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-21 21:00:00,37.277778,,,,...,3,90,66,41,69,69,69,69,69,37
1,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 01:00:00,37.166667,,,,...,2,91,67,42,73,73,73,73,73,41
0,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 04:00:00,37.055556,,,,...,1,92,68,43,78,78,78,78,78,46
92,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 08:00:00,37.111111,,,,...,93,93,58,33,34,34,34,34,34,2
93,13180007,27543152,30000213,2162-06-21 05:38:00,2162-06-22 20:52:48,2162-06-22 12:00:00,37.111111,,,,...,94,94,59,34,39,39,39,39,39,7


In [35]:
result_aortic_dias = result_df[~result_df['aorticpressure_dias'].isna()]
result_aortic_dias

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,temperature,heart_rate,aorticpressure_dias,aorticpressure_sys,resp_rate,mean_aorticpressure_dias,mean_aorticpressure_sys
30,10010058,26359957,33060379,2147-11-18 03:19:00,2147-11-19 08:53:33,39.700000,60.0,29.0,37.0,15.0,63.318182,82.409091
70,10022537,29573431,35341845,2185-01-23 10:25:23,2185-02-21 13:58:00,,80.0,4.0,47.0,20.0,2.694030,44.238806
95,10033740,22891575,31358337,2183-01-22 11:00:00,2183-02-03 16:05:42,36.055556,80.0,62.0,90.0,19.0,59.642857,86.285714
172,10073239,28901382,32755896,2144-08-07 18:19:11,2144-08-08 18:32:51,37.600000,60.0,43.0,79.0,17.0,49.190476,88.619048
247,10107943,21588409,34619266,2126-05-01 19:15:18,2126-05-03 06:26:46,37.900000,144.0,52.0,86.0,21.0,54.200000,103.500000
...,...,...,...,...,...,...,...,...,...,...,...,...
25112,19757659,22335667,38515238,2165-11-26 13:50:44,2165-11-27 07:37:22,37.300000,84.0,49.0,116.0,30.0,44.000000,97.727273
25237,19810932,29764035,31466277,2148-12-14 22:41:05,2148-12-19 01:13:04,,80.0,73.0,113.0,18.0,64.695652,98.652174
25309,19838433,25536531,34873306,2173-09-02 08:03:05,2173-09-09 19:07:00,37.600000,67.0,54.0,91.0,20.0,55.102041,93.142857
25673,19968075,28592225,31756531,2153-04-22 13:06:46,2153-04-23 22:38:43,39.000000,80.0,19.0,40.0,26.0,25.769231,45.000000


In [36]:
test = vitalsigns[(vitalsigns['stay_id'] == 37081114) & (vitalsigns['itemid'].isin([220045]))]

In [37]:
a = test.sort_values(by='charttime')
a

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
44,10000690,25860671,37081114,17393.0,2150-11-02 19:54:00,2150-11-02 20:12:00,220045,79.0,79.0,bpm,0.0
46,10000690,25860671,37081114,17393.0,2150-11-02 20:00:00,2150-11-02 20:12:00,220045,75.0,75.0,bpm,0.0
48,10000690,25860671,37081114,17393.0,2150-11-02 20:12:00,2150-11-02 20:12:00,220045,80.0,80.0,bpm,0.0
51,10000690,25860671,37081114,17393.0,2150-11-02 21:00:00,2150-11-02 21:26:00,220045,71.0,71.0,bpm,0.0
53,10000690,25860671,37081114,17393.0,2150-11-02 22:00:00,2150-11-02 22:08:00,220045,62.0,62.0,bpm,0.0
...,...,...,...,...,...,...,...,...,...,...,...
33,10000690,25860671,37081114,8787.0,2150-11-06 12:00:00,2150-11-06 13:15:00,220045,80.0,80.0,bpm,0.0
35,10000690,25860671,37081114,8787.0,2150-11-06 13:00:00,2150-11-06 13:15:00,220045,94.0,94.0,bpm,0.0
37,10000690,25860671,37081114,8787.0,2150-11-06 14:00:00,2150-11-06 14:12:00,220045,118.0,118.0,bpm,0.0
39,10000690,25860671,37081114,9137.0,2150-11-06 15:00:00,2150-11-06 16:11:00,220045,90.0,90.0,bpm,0.0


In [29]:
a.loc[a['value'].idxmax()]

subject_id                 10000690
hadm_id                    25860671
stay_id                    37081114
caregiver_id                85861.0
charttime       2150-11-05 11:45:00
storetime       2150-11-05 11:48:00
itemid                       220045
value                         137.0
valuenum                      137.0
valueuom                        bpm
Name: 207, dtype: object

In [129]:
list_tp = target_patients['stay_id'].tolist()
len(list_tp)

25782

In [130]:
test = vitalsigns[(vitalsigns['stay_id'].isin(list_tp)) & (vitalsigns['itemid'].isin([228151, 228152]))]
test

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
19627,10010058,26359957,33060379,4364.0,2147-11-18 14:33:00,2147-11-18 14:34:00,228151,60.0,60.0,mmHg.,0.0
19628,10010058,26359957,33060379,4364.0,2147-11-18 14:33:00,2147-11-18 14:34:00,228152,67.0,67.0,mmHg.,0.0
19631,10010058,26359957,33060379,4364.0,2147-11-18 15:07:00,2147-11-18 15:09:00,228151,63.0,63.0,mmHg.,0.0
19632,10010058,26359957,33060379,4364.0,2147-11-18 15:07:00,2147-11-18 15:09:00,228152,69.0,69.0,mmHg.,0.0
19638,10010058,26359957,33060379,28360.0,2147-11-18 07:26:00,2147-11-18 07:27:00,228151,74.0,74.0,mmHg.,0.0
...,...,...,...,...,...,...,...,...,...,...,...
20618243,19997473,27787494,32134105,97419.0,2173-09-19 05:00:00,2173-09-19 05:15:00,228152,103.0,103.0,mmHg.,0.0
20618247,19997473,27787494,32134105,97419.0,2173-09-19 06:00:00,2173-09-19 06:14:00,228151,51.0,51.0,mmHg.,0.0
20618248,19997473,27787494,32134105,97419.0,2173-09-19 06:00:00,2173-09-19 06:14:00,228152,109.0,109.0,mmHg.,0.0
20618249,19997473,27787494,32134105,97419.0,2173-09-19 19:48:00,2173-09-19 19:48:00,228151,50.0,50.0,mmHg.,0.0


In [166]:
a = test.drop_duplicates(subset = 'stay_id')
b = a[~a['stay_id'].isin(aortic_sys_df['stay_id'])]
b

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
113408,10050532,28769042,36336444,3202.0,2154-03-04 20:01:00,2154-03-04 20:02:00,228151,60.0,60.0,mmHg.,0.0
1143874,10570690,24727929,32713716,1349.0,2183-11-17 08:00:00,2183-11-17 08:15:00,228151,13.0,13.0,mmHg.,0.0
1214305,10612217,28651899,32097635,3202.0,2163-11-16 08:13:00,2163-11-16 08:14:00,228151,59.0,59.0,mmHg.,0.0
1317745,10663695,22455973,39879701,1349.0,2139-11-27 07:00:00,2139-11-27 07:36:00,228151,10.0,10.0,mmHg.,0.0
1567568,10785126,20598872,39785361,1349.0,2130-01-20 08:19:00,2130-01-20 08:20:00,228151,66.0,66.0,mmHg.,0.0
2389711,11181695,29624209,32029248,12880.0,2117-04-09 08:00:00,2117-04-09 08:22:00,228151,48.0,48.0,mmHg.,0.0
2475180,11212692,28948855,33102601,77930.0,2155-11-09 23:30:00,2155-11-09 23:39:00,228151,55.0,55.0,mmHg.,0.0
2729593,11326660,25434637,30304895,5808.0,2177-02-15 09:42:00,2177-02-15 09:43:00,228151,68.0,68.0,mmHg.,0.0
3040069,11466140,26877857,31936668,11482.0,2144-08-24 08:00:00,2144-08-24 08:40:00,228151,56.0,56.0,mmHg.,0.0
3355421,11607177,27709554,38248273,91542.0,2149-01-24 18:51:00,2149-01-24 20:22:00,228151,53.0,53.0,mmHg.,0.0


In [168]:
result_code = pd.read_csv('/media/data/huyennm/mimic-iv/mimiciv_data/vitalsigns/vitalsigns_results/vitals_output.csv')
result_code

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,temperature,heart_rate,aorticpressure_dias,aorticpressure_sys,resp_rate,mean_aorticpressure_dias,mean_aorticpressure_sys
0,10000690,25860671,37081114,2150-11-02 19:37:00,2150-11-06 17:03:17,36.666667,80.0,,,35.0,,
1,10000980,26913865,39765666,2189-06-27 08:42:00,2189-06-27 20:38:27,37.055556,69.0,,,21.0,,
2,10001843,26133978,39698942,2134-12-05 18:50:03,2134-12-06 14:38:26,36.388889,116.0,,,27.0,,
3,10002013,23581541,39060235,2160-05-18 10:00:53,2160-05-19 17:33:33,36.222222,95.0,,,16.0,,
4,10002155,23822395,33685454,2129-08-04 12:45:00,2129-08-10 17:02:38,36.833333,78.0,,,17.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
25777,19998330,23151993,31085771,2178-09-20 21:40:00,2178-09-21 18:06:26,36.555556,68.0,,,23.0,,
25778,19998330,24096838,33428243,2178-11-27 22:53:00,2178-11-29 21:29:39,36.500000,80.0,,,25.0,,
25779,19998330,24492004,32641669,2178-10-01 08:51:00,2178-10-03 23:25:08,37.388889,79.0,,,17.0,,
25780,19998591,24349193,31144045,2185-07-16 18:48:18,2185-07-26 18:27:01,35.888889,91.0,,,21.0,,


In [169]:
missing_percentage = (result_code.isna().sum() / len(result_code)) * 100

# Display the missing percentage for each column
missing_percentage

subject_id                   0.000000
hadm_id                      0.000000
stay_id                      0.000000
intime                       0.000000
outtime                      0.007757
temperature                  4.212241
heart_rate                   0.000000
aorticpressure_dias         99.480258
aorticpressure_sys          99.480258
resp_rate                    0.050423
mean_aorticpressure_dias    99.266930
mean_aorticpressure_sys     99.266930
dtype: float64

In [15]:
urine_query = """
select
  -- patient identifiers
  tp.subject_id, tp.hadm_id, tp.stay_id, oe.charttime as charttime_day

  -- volumes associated with urine output ITEMIDs
  , sum(
      -- we consider input of GU irrigant as a negative volume
      case
        when oe.itemid = 227488 and oe.value > 0 then -1*oe.value
        else oe.value end
  ) as UrineOutput
from target_patients tp
-- Join to the outputevents table to get urine output
left join outputevents oe
-- join on all patient identifiers
on tp.subject_id = oe.subject_id and tp.hadm_id = oe.hadm_id and tp.stay_id = oe.stay_id
-- and ensure the data occurs during the first day
and oe.charttime >= tp.intime and oe.charttime <= DATETIME(tp.intime, '+1 day')
where itemid in
(
-- these are the most frequently occurring urine output observations in CareVue
40055, -- "Urine Out Foley"
43175, -- "Urine ."
40069, -- "Urine Out Void"
40094, -- "Urine Out Condom Cath"
40715, -- "Urine Out Suprapubic"
40473, -- "Urine Out IleoConduit"
40085, -- "Urine Out Incontinent"
40057, -- "Urine Out Rt Nephrostomy"
40056, -- "Urine Out Lt Nephrostomy"
40405, -- "Urine Out Other"
40428, -- "Urine Out Straight Cath"
40086,--	Urine Out Incontinent
40096, -- "Urine Out Ureteral Stent #1"
40651, -- "Urine Out Ureteral Stent #2"

-- these are the most frequently occurring urine output observations in MetaVision
226559, -- "Foley"
226560, -- "Void"
226561, -- "Condom Cath"
226584, -- "Ileoconduit"
226563, -- "Suprapubic"
226564, -- "R Nephrostomy"
226565, -- "L Nephrostomy"
226567, --	Straight Cath
226557, -- R Ureteral Stent
226558, -- L Ureteral Stent
227488, -- GU Irrigant Volume In
227489  -- GU Irrigant/Urine Volume Out
)
group by tp.subject_id, tp.hadm_id, tp.stay_id, charttime_day
order by tp.subject_id, tp.hadm_id, tp.stay_id;
"""

In [6]:
conn = sqlite3.connect(":memory:")

target_patients.to_sql('target_patients', conn, index=False, if_exists='replace')
outputevents.to_sql('outputevents', conn, index=False, if_exists='replace')

5359395

In [16]:
start_time = time.time()
result_df = pd.read_sql_query(urine_query, conn)
end_time = time.time()
execution_time = end_time - start_time
print(f"Time taken: {execution_time:.4f} seconds")

Time taken: 1.8983 seconds


In [19]:
result_df

Unnamed: 0,subject_id,hadm_id,stay_id,charttime_day,UrineOutput
0,10000690,25860671,37081114,2150-11-02 21:27:00,80.0
1,10000690,25860671,37081114,2150-11-02 22:00:00,30.0
2,10000690,25860671,37081114,2150-11-02 23:00:00,60.0
3,10000690,25860671,37081114,2150-11-03 00:00:00,50.0
4,10000690,25860671,37081114,2150-11-03 01:00:00,40.0
...,...,...,...,...,...
307134,19998591,24349193,36794489,2185-07-04 18:00:00,35.0
307135,19998591,24349193,36794489,2185-07-04 19:00:00,30.0
307136,19998591,24349193,36794489,2185-07-04 20:00:00,40.0
307137,19998591,24349193,36794489,2185-07-04 21:00:00,5.0


In [7]:
bg_output = pd.read_csv('/media/data/huyennm/mimic-iv/mimiciv_data/vitalsigns/vitalsigns_results/vitalsign_output.csv')
bg_output

Unnamed: 0,subject_id,stay_id,heart_rate_min,heart_rate_max,heart_rate_mean,resp_rate_min,resp_rate_max,resp_rate_mean,temperature_min,temperature_max,temperature_mean,map_dias_min,map_dias_max,map_dias_mean,map_sys_min,map_sys_max,map_sys_mean
0,10001843,39698942,90.0,155.0,123.294118,16.0,27.0,21.176471,36.39,36.61,36.480000,,,,,,
1,10002013,39060235,80.0,105.0,94.636364,11.0,23.0,15.409091,36.22,37.70,37.242353,,,,,,
2,10002428,35479615,67.0,109.0,89.259259,16.0,25.0,19.696429,36.22,38.94,37.482857,,,,,,
3,10002760,31831386,59.0,97.0,72.560000,8.5,21.0,14.910714,36.39,36.39,36.390000,,,,,,
4,10003400,32128372,98.0,131.0,115.408163,12.0,29.0,18.960317,35.28,37.00,35.896875,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10205,19997752,34531437,48.0,83.0,62.868421,14.5,23.0,17.900000,36.22,36.94,36.488333,,,,,,
10206,19997760,33057362,75.0,104.0,80.066667,20.0,35.0,25.390625,36.56,37.33,37.089000,,,,,,
10207,19998330,31417783,66.0,79.0,71.400000,12.0,26.0,18.596154,36.72,37.17,36.935714,,,,,,
10208,19998330,32641669,59.0,81.0,70.440000,14.0,24.0,19.076923,36.44,37.44,37.145000,,,,,,


In [8]:
missing_percentage = (bg_output.isna().sum() / len(bg_output)) * 100
missing_percentage

subject_id           0.000000
stay_id              0.000000
heart_rate_min       0.000000
heart_rate_max       0.000000
heart_rate_mean      0.000000
resp_rate_min        0.009794
resp_rate_max        0.009794
resp_rate_mean       0.009794
temperature_min      9.911851
temperature_max      9.911851
temperature_mean     9.911851
map_dias_min        98.942214
map_dias_max        98.942214
map_dias_mean       98.942214
map_sys_min         98.942214
map_sys_max         98.942214
map_sys_mean        98.942214
dtype: float64

In [73]:
len((bg_output[~bg_output['pao2fio2ratio_min'].isna()]).drop_duplicates(subset='subject_id'))

9625

In [26]:
list_target = target_patients['subject_id'].tolist()
list_target

[13180007,
 18421337,
 15726459,
 12168737,
 14311522,
 16235911,
 11027822,
 17686783,
 18730522,
 12509799,
 12098571,
 17244693,
 11346472,
 17516322,
 18172623,
 17445720,
 10332722,
 16805735,
 18855147,
 19324712,
 15171885,
 16828280,
 11822425,
 10106899,
 12780478,
 19101100,
 13421733,
 18329161,
 11578593,
 14992874,
 11885477,
 15752366,
 14412677,
 11002360,
 14923903,
 14475287,
 11503970,
 13272956,
 10207365,
 11348907,
 16142166,
 10852700,
 17373149,
 19511675,
 10570615,
 13859690,
 18369403,
 11621360,
 11638303,
 17266996,
 14004449,
 11009864,
 10836215,
 18182797,
 13092399,
 16051778,
 17278325,
 12525991,
 17415666,
 12002285,
 11919942,
 19635799,
 13966675,
 13787728,
 19180828,
 15369746,
 14351751,
 18864963,
 11173428,
 15461582,
 18557012,
 12595991,
 15087712,
 15831913,
 17036390,
 17051420,
 18040308,
 18160815,
 12322492,
 15622498,
 19867017,
 12995112,
 15945590,
 14182243,
 17567845,
 13942199,
 18970393,
 14538549,
 12875089,
 11540283,
 10546797,

In [25]:
file_event_path = '/media/data/huyennm/mimic-iv/mimic-derived/derived_database/bg.csv'

chunksize = 10**6
chunk_list = []

for chunk in pd.read_csv(file_event_path, chunksize=chunksize):
    chunk_list.append(chunk)
bg = pd.concat(chunk_list, axis=0)
bg

Unnamed: 0,subject_id,hadm_id,charttime,specimen,so2,spo2,po2,pco2,fio2_chartevents,fio2,...,hemoglobin,carboxyhemoglobin,methemoglobin,chloride,calcium,temperature,potassium,sodium,lactate,glucose
0,19811045,27885031.0,2162-07-13 04:07:00,ART.,,97.0,105.0,37.0,50.0,,...,,,,,,,,,1.1,
1,19811045,27885031.0,2162-07-14 14:20:00,VEN.,,97.0,34.0,45.0,40.0,,...,,,,,,,,,1.5,152.0
2,19811138,26119621.0,2172-05-31 04:09:00,VEN.,,94.0,33.0,28.0,,,...,,,,,1.07,35.9,,,9.4,
3,19811879,27935722.0,2183-08-16 21:45:00,ART.,,100.0,126.0,27.0,50.0,,...,,,,,,,,,,
4,19812073,29007664.0,2148-06-02 21:57:00,VEN.,,,72.0,70.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
697413,17649217,29476837.0,2114-08-02 02:22:00,ART.,,96.0,108.0,48.0,60.0,,...,,,,,,,,,,
697414,17649796,25635238.0,2129-01-14 02:02:00,ART.,,100.0,190.0,32.0,35.0,,...,,,,,,,3.2,,,
697415,17650265,28377727.0,2202-03-17 00:14:00,ART.,,100.0,103.0,31.0,40.0,,...,,,,,,,,,,
697416,17651038,21747890.0,2186-02-28 17:12:00,VEN.,,99.0,36.0,33.0,,,...,,,,,,,,,0.6,


In [28]:
merge = bg[bg['subject_id'].isin(list_target)]
merge

Unnamed: 0,subject_id,hadm_id,charttime,specimen,so2,spo2,po2,pco2,fio2_chartevents,fio2,...,hemoglobin,carboxyhemoglobin,methemoglobin,chloride,calcium,temperature,potassium,sodium,lactate,glucose
4,19812073,29007664.0,2148-06-02 21:57:00,VEN.,,,72.0,70.0,,,...,,,,,,,,,,
5,19812073,26099257.0,2149-04-08 10:52:00,CENTRAL VENOUS.,,92.0,40.0,75.0,,,...,,,,,,,,,,
6,19812504,,2178-02-01 05:59:00,VEN.,69.0,,42.0,58.0,,,...,11.5,1.0,,99.0,1.21,,5.9,131.0,1.4,142.0
10,19813103,25866525.0,2164-05-14 14:30:00,ART.,,,86.0,40.0,,,...,,,,,,,,,4.1,
11,19813103,26669689.0,2166-06-23 17:05:00,VEN.,,,105.0,54.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
697399,17645766,24493468.0,2140-06-04 20:16:00,ART.,,99.0,116.0,40.0,60.0,,...,,,,,1.06,,4.9,,,127.0
697400,17645766,24493468.0,2140-06-05 00:16:00,ART.,96.0,98.0,96.0,37.0,50.0,50.0,...,,,,,,36.4,4.4,,,109.0
697401,17646651,29988947.0,2175-09-03 05:47:00,VEN.,,99.0,115.0,56.0,,,...,,,,,,,,,1.5,
697416,17651038,21747890.0,2186-02-28 17:12:00,VEN.,,99.0,36.0,33.0,,,...,,,,,,,,,0.6,


In [32]:
b = target_patients[target_patients['subject_id'] == 19812073]
b

Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,charttime,vent,age_at_admission
1543,19812073,26099257,30587871,2149-04-01 03:26:59,2149-04-05 15:16:13,2149-04-02 03:00:00,1,83
7225,19812073,25140121,32820188,2148-08-29 17:46:27,2148-09-01 17:54:48,2148-08-30 17:25:00,1,82
13085,19812073,26099257,35084383,2149-04-08 06:20:16,2149-04-10 18:04:06,2149-04-09 04:00:00,1,83
13213,19812073,29007664,35138397,2148-05-30 20:32:39,2148-06-02 18:57:02,2148-05-31 20:00:00,1,82


In [33]:
a = bg[bg['subject_id'] == 19812073]
a

Unnamed: 0,subject_id,hadm_id,charttime,specimen,so2,spo2,po2,pco2,fio2_chartevents,fio2,...,hemoglobin,carboxyhemoglobin,methemoglobin,chloride,calcium,temperature,potassium,sodium,lactate,glucose
4,19812073,29007664.0,2148-06-02 21:57:00,VEN.,,,72.0,70.0,,,...,,,,,,,,,,
5,19812073,26099257.0,2149-04-08 10:52:00,CENTRAL VENOUS.,,92.0,40.0,75.0,,,...,,,,,,,,,,
126560,19812073,,2141-12-28 09:53:00,ART.,97.0,,102.0,43.0,,,...,10.0,,,,,,,,,
126561,19812073,29007664.0,2148-06-04 05:03:00,VEN.,,,84.0,60.0,,,...,,,,,,,,,,
126562,19812073,25140121.0,2148-09-05 10:49:00,VEN.,,,85.0,60.0,,,...,,,,,,,,,,
126563,19812073,26099257.0,2149-04-01 04:04:00,VEN.,,100.0,93.0,75.0,,,...,,,,105.0,1.22,,5.9,136.0,1.1,130.0
126564,19812073,26099257.0,2149-04-08 05:17:00,MIX.,,,46.0,89.0,,,...,,,,,,,,,0.5,
231480,19812073,20555520.0,2147-05-09 15:57:00,VEN.,,,117.0,64.0,,,...,,,,,,,,,,
231481,19812073,25140121.0,2148-08-30 16:10:00,ART.,,100.0,115.0,57.0,30.0,,...,,,,,,,,,,
231482,19812073,26099257.0,2149-04-01 06:35:00,ART.,,99.0,229.0,38.0,50.0,,...,,,,,,,,,,


In [41]:
merged = pd.merge(a, b, on='subject_id')
merged['charttime'] = pd.to_datetime(merged['charttime'])
merged['intime'] = pd.to_datetime(merged['intime'])
filtered_a = merged[(merged['charttime'] >= merged['intime']) & 
                    (merged['charttime'] <= (merged['intime'] + pd.Timedelta(days=1)))]

filtered_a

KeyError: 'charttime'

In [40]:
b.dtypes

subject_id                   int64
hadm_id                      int64
stay_id                      int64
intime              datetime64[ns]
outtime                     object
charttime                   object
vent                         int64
age_at_admission             int64
dtype: object