In [10]:
import pandas as pd

file_path = r"E:\EICU\eicu-collaborative-research-database-2.0\medication.csv.gz"

# read medication data, only keep relevant columns
df = pd.read_csv(file_path, usecols=["patientunitstayid", "drugname", "drugstartoffset"], low_memory=False)

# define a list of vasopressor drugs
vasopressor_drugs = ["vasopressor", "norepinephrine", "epinephrine", "dopamine", "phenylephrine", "vasopressin"]

# filter the dataframe to include only vasopressor drugs
df_vasopressor = df[df["drugname"].str.lower().str.contains('|'.join(vasopressor_drugs), na=False)]

# process drugstartoffset to ensure it is within the range of 0 to 60 minutes
df_vasopressor = df_vasopressor[(df_vasopressor["drugstartoffset"] >= 0) & (df_vasopressor["drugstartoffset"] <= 60)]

# group by patientunitstayid and mark vasopressor usage
df_vasopressor["vasopressor_used"] = 1

df_vasopressor = df_vasopressor[["patientunitstayid", "vasopressor_used"]].drop_duplicates()





In [11]:
# read patient data
df_patients = pd.read_csv("E:\EICU\eicu-collaborative-research-database-2.0\patient.csv.gz")

# merge vasopressor usage with patient data
df_final = df_patients[["patientunitstayid"]].merge(df_vasopressor, on="patientunitstayid", how="left").fillna(0)

  df_patients = pd.read_csv("E:\EICU\eicu-collaborative-research-database-2.0\patient.csv.gz")


In [12]:
df_final

Unnamed: 0,patientunitstayid,vasopressor_used
0,141168,0.0
1,141178,0.0
2,141179,0.0
3,141194,0.0
4,141196,0.0
...,...,...
200854,3353235,0.0
200855,3353237,0.0
200856,3353251,1.0
200857,3353254,0.0


In [13]:
df_final["vasopressor_used"].value_counts()

vasopressor_used
0.0    196563
1.0      4296
Name: count, dtype: int64

In [14]:
df_final['patientunitstayid'].nunique()

200859

In [15]:
df_final.to_csv("vasopressor.csv", index=False)

In [2]:
df_vasopressor

Unnamed: 0,patientunitstayid,vasopressor_used
190603,446669,1
201625,474377,1
203227,478077,1
203589,478815,1
207188,487041,1
...,...,...
7301161,3352970,1
7301458,3353087,1
7301568,3353123,1
7301692,3353197,1


In [3]:
df_vasopressor["vasopressor_used"].value_counts()

vasopressor_used
1    11915
Name: count, dtype: int64