# Vital

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import random
random.seed(1) 
np.random.seed(1)
from tqdm import tqdm


In [2]:
file_path = r"E:\EICU\eicu-collaborative-research-database-2.0\vitalPeriodic.csv.gz"

# The file is large, so we will read it in chunks
chunk_size = 100000
chunks = []
total_rows = sum(1 for _ in pd.read_csv(file_path, compression="gzip", usecols=['patientunitstayid'], chunksize=chunk_size))


In [3]:
columns_to_read = ['patientunitstayid', 'observationoffset', 'systemicsystolic', 'systemicdiastolic','systemicmean','heartrate','respiration']

In [4]:
total_rows

1467

In [5]:
with tqdm(total=total_rows, desc="Processing nurseCharting.csv") as pbar:
    for chunk in pd.read_csv(file_path, compression="gzip", low_memory=False, usecols=columns_to_read,chunksize=chunk_size):
        chunk_filtered = chunk[chunk['observationoffset'] <= 1440]
        chunks.append(chunk_filtered)
        
        pbar.update(len(chunk))

Processing nurseCharting.csv: 146671642it [01:46, 1378640.07it/s]     


In [6]:
vital_df = pd.concat(chunks, ignore_index=True)

In [7]:
vital_df

Unnamed: 0,patientunitstayid,observationoffset,heartrate,respiration,systemicsystolic,systemicdiastolic,systemicmean
0,141168,1289,118.0,,,,
1,141168,1374,118.0,,,,
2,141168,419,132.0,,,,
3,141168,754,128.0,,,,
4,141168,1389,78.0,,,,
...,...,...,...,...,...,...,...
45526935,3353263,504,87.0,36.0,,,
45526936,3353263,534,81.0,21.0,,,
45526937,3353263,549,78.0,14.0,,,
45526938,3353263,544,85.0,25.0,,,


In [8]:
vital_df['nipd_systolic_avg'] = vital_df.groupby('patientunitstayid')['systemicsystolic'].transform('mean')
vital_df['nipd_diastolic_avg'] = vital_df.groupby('patientunitstayid')['systemicdiastolic'].transform('mean')

In [10]:
vital_df.head()

Unnamed: 0,patientunitstayid,observationoffset,heartrate,respiration,systemicsystolic,systemicdiastolic,systemicmean,nipd_systolic_avg,nipd_diastolic_avg
0,141168,1289,118.0,,,,,,
1,141168,1374,118.0,,,,,,
2,141168,419,132.0,,,,,,
3,141168,754,128.0,,,,,,
4,141168,1389,78.0,,,,,,


In [9]:
vital_df['nipd_systolic_min'] = vital_df.groupby('patientunitstayid')['systemicsystolic'].transform('min')
vital_df['nipd_diastolic_min'] = vital_df.groupby('patientunitstayid')['systemicdiastolic'].transform('min')

In [10]:
vital_df['nipd_systolic_max'] = vital_df.groupby('patientunitstayid')['systemicsystolic'].transform('max')
vital_df['nipd_diastolic_max'] = vital_df.groupby('patientunitstayid')['systemicdiastolic'].transform('max')

In [11]:
vital_df['heartrate_min'] = vital_df.groupby('patientunitstayid')['heartrate'].transform('min')
vital_df['heartrate_max'] = vital_df.groupby('patientunitstayid')['heartrate'].transform('max')

In [12]:
vital_df['respiration_min'] = vital_df.groupby('patientunitstayid')['respiration'].transform('min')
vital_df['respiration_max'] = vital_df.groupby('patientunitstayid')['respiration'].transform('max')
vital_df['respiration_avg'] = vital_df.groupby('patientunitstayid')['respiration'].transform('mean')

In [13]:
vital_df.drop(columns=['systemicsystolic', 'systemicdiastolic', 'heartrate', 'respiration','observationoffset'], inplace=True)

In [14]:
vital_df.drop_duplicates(inplace=True)

In [15]:
vital_df

Unnamed: 0,patientunitstayid,systemicmean,nipd_systolic_avg,nipd_diastolic_avg,nipd_systolic_min,nipd_diastolic_min,nipd_systolic_max,nipd_diastolic_max,heartrate_min,heartrate_max,respiration_min,respiration_max,respiration_avg
0,141168,,,,,,,,78.0,140.0,,,
265,141178,,,,,,,,88.0,88.0,,,
266,141179,,,,,,,,78.0,110.0,,,
554,141194,54.0,97.573248,39.859873,56.0,30.0,120.0,62.0,72.0,117.0,0.0,34.0,22.961672
555,141194,52.0,97.573248,39.859873,56.0,30.0,120.0,62.0,72.0,117.0,0.0,34.0,22.961672
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45526615,3353251,67.0,121.559140,60.487455,0.0,0.0,208.0,121.0,60.0,104.0,13.0,33.0,25.414035
45526630,3353251,68.0,121.559140,60.487455,0.0,0.0,208.0,121.0,60.0,104.0,13.0,33.0,25.414035
45526663,3353251,87.0,121.559140,60.487455,0.0,0.0,208.0,121.0,60.0,104.0,13.0,33.0,25.414035
45526672,3353254,,,,,,,,72.0,83.0,9.0,31.0,22.226190


In [16]:
vital_df.to_csv('vital_variable.csv', index=False)

In [17]:
vital_df['patientunitstayid'].nunique()

192320

# Vital Signs Summary

In [None]:
import pandas as pd
import numpy as np
import os
import random
from tqdm import tqdm

random.seed(1)
np.random.seed(1)

# read vitalPeriodic data in chunks
file_path = r"E:\EICU\eicu-collaborative-research-database-2.0\vitalPeriodic.csv.gz"

chunk_size = 100000
columns_to_read = ['patientunitstayid', 'observationoffset', 'systemicmean', 'heartrate', 'respiration']

total_rows = sum(1 for _ in pd.read_csv(file_path, compression="gzip", usecols=['patientunitstayid'], chunksize=chunk_size))

chunks = []
with tqdm(total=total_rows, desc="Processing vitalPeriodic.csv") as pbar:
    for chunk in pd.read_csv(file_path, compression="gzip", low_memory=False, usecols=columns_to_read, chunksize=chunk_size):
        chunk_filtered = chunk[(chunk['observationoffset'] >= 0) & (chunk['observationoffset'] <= 1440)]  # only keep records within 0 to 1440 minutes
        chunks.append(chunk_filtered)
        pbar.update(len(chunk))

vital_df = pd.concat(chunks, ignore_index=True)


In [None]:
# retrieve the latest `systemicmean` for each `patientunitstayid`
systemicmean_latest = vital_df.loc[vital_df.groupby("patientunitstayid")["observationoffset"].idxmin(), ["patientunitstayid", "systemicmean"]]

# calculate statistics for `heartrate` and `respiration` based on ICU admission data
vital_stats = vital_df.groupby("patientunitstayid").agg(
    heartrate_min=('heartrate', 'min'),
    heartrate_max=('heartrate', 'max'),
    respiration_min=('respiration', 'min'),
    respiration_max=('respiration', 'max'),
    respiration_avg=('respiration', 'mean')
).reset_index()

vital_summary = systemicmean_latest.merge(vital_stats, on="patientunitstayid", how="left")

In [None]:
vital_summary.to_csv("vital_summary.csv", index=False)