# Prescriptions

In [1]:
import pandas as pd 

In [2]:
icu_stay_df = pd.read_csv("./temp_pp_data_files/icustay_lab_values.csv")
prescriptions_df = pd.read_csv("../V1.1.0/PRESCRIPTIONS.csv")

In [3]:
# filter ICU medication only
icu_only_prescriptions_df = prescriptions_df[prescriptions_df["ICUSTAY_ID"].notnull()]

In [4]:
icu_only_prescriptions_df.drop(["ROW_ID", "DRUG_NAME", "DRUG_NAME_GENERIC","PROD_STRENGTH", "DRUG_FORM", "STARTDATE", "ENDDATE"], inplace=True, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  icu_only_prescriptions_df.drop(["ROW_ID", "DRUG_NAME", "DRUG_NAME_GENERIC","PROD_STRENGTH", "DRUG_FORM", "STARTDATE", "ENDDATE"], inplace=True, axis=1)


In [5]:
# ICUSTAY_IDs are ints
icu_only_prescriptions_df = icu_only_prescriptions_df.astype({"ICUSTAY_ID": int})

### Check: Most common drugs overall vs most common per patient

In [6]:
most_common_overall = icu_only_prescriptions_df.groupby("DRUG_NAME_EN")["DRUG_NAME_EN"].count().sort_values(ascending=False).head(200)

In [7]:
most_common_across_patients = icu_only_prescriptions_df.groupby("DRUG_NAME_EN")["HADM_ID"].nunique().sort_values(ascending=False).head(30)

In [8]:
# common drugs?
set(most_common_overall.index) & set(most_common_across_patients.index)

{'(4:1)Glucose and Sodium Chloride Injection',
 '0.9% Sodium Chloride Injection',
 '10% Calcium Gluconate Injection',
 '10% Glucose Injection',
 '10% Potassium Chloride Injection',
 '20% Albumin Prepared From Human Plasma Injection',
 '5% Glucose and Sodium Chloride Injection',
 '5% Sodium Bicarbonate Injection',
 '50% Glucose Injection',
 'Adrenaline Hydrochlaride Injection',
 'Ambroxol Hydrochloride Injection',
 'Benzalkonium Chloride Patches',
 'Budesonide Suspension for Inhalation',
 'Concentrated Sodium Chloride Injection',
 'Diazepam Injection',
 'Enema Glycerini',
 'Furosemide Injection',
 'Haemocoagulase Agkistrodon for Injection',
 'Heparin Sodium Injection',
 'Ipratropium Bromide Solution for Inhalation',
 'Medium and Long Chain Fat Emulsion Injection',
 'Methylprednisolone Sodium Succinate for Injection',
 'Midazolam Injection',
 'Omeprazole Sodium For Injection',
 'Pediatric Compound Amino Acid Injection(19AA-I)',
 'Povidone lodine Solution',
 'Sodium Chloride Physiological

In [9]:
# overall only
set(most_common_overall.index) - set(most_common_across_patients.index)

{' Dexmedetomidine Hydrochloride Injection',
 '10% Chloral Hydrate Enemas',
 '10% Chloral Hydrate Syrup',
 '10% Potassium Chloride Oral Solution',
 '10% Sodium Chloride Injection',
 '20% Mannitol Injection',
 '20% Medium and Long Chain Fat Emulsion Injection',
 '25% Magnesium Sulfate Injection',
 '5% Amino Acidi Injection',
 '5% Calcium Chloride Injection',
 '5% Glucose Injection',
 '6% Dextran 40 Glucose Injection',
 'Aciclovir for Injection',
 'Ademetionine  1,4-Butanedisulfonate for Injection',
 'Alprostadil Injection',
 'Ambroxol Hydrochloride and Glucose Injection',
 'Aminomethylbenzoic Acid Injection',
 'Amiodarone Hydrochloride injection',
 'Amoxicillin Sodium and Clavulanate Potassium for Injection',
 'Ampicillin Sodium and Sulbactam Sodium for Injection',
 'Arginine Hydrochloride Injection',
 'Azithromycin for Injection',
 'Beclomethasone dipropionate inhalation suspension',
 'Benzylpenicillin Sodium for Injection',
 'Caffeine Citrate Injection',
 'Calcium Acetate Granules',
 

In [10]:
# across patients only
set(most_common_across_patients.index) - set(most_common_overall.index)

set()

In [11]:
# we will use both sets, most common overall and most common across patients
most_common_drugs = set(most_common_overall.index).union(set(most_common_across_patients.index) - set(most_common_overall.index))

In [12]:
# most common drugs make about 75% of all ICU drug prescriptions
most_common_drugs_df = icu_only_prescriptions_df[icu_only_prescriptions_df["DRUG_NAME_EN"].isin(most_common_drugs)]

In [13]:
cumulative_dose_per_hadm = most_common_drugs_df.groupby(["DRUG_NAME_EN", "HADM_ID"]).agg({"DOSE_VAL_RX":"sum", "DOSE_UNIT_RX": "unique"})

In [14]:
cumulative_dose_per_hadm

Unnamed: 0_level_0,Unnamed: 1_level_0,DOSE_VAL_RX,DOSE_UNIT_RX
DRUG_NAME_EN,HADM_ID,Unnamed: 2_level_1,Unnamed: 3_level_1
Dexmedetomidine Hydrochloride Injection,102610,1000.00,[μg]
Dexmedetomidine Hydrochloride Injection,102743,1300.00,[μg]
Dexmedetomidine Hydrochloride Injection,102749,133.34,[μg]
Dexmedetomidine Hydrochloride Injection,111055,600.00,[μg]
Dexmedetomidine Hydrochloride Injection,111706,400.00,[μg]
...,...,...,...
ω-3 Fish Oil Fat Emulsion Injection,113724,736.00,[ml]
ω-3 Fish Oil Fat Emulsion Injection,113802,128.00,[ml]
ω-3 Fish Oil Fat Emulsion Injection,113864,130.00,[ml]
ω-3 Fish Oil Fat Emulsion Injection,113924,608.00,[ml]


In [18]:
# doses seem to be mixed sometimes; leave them out for now
cumulative_dose_per_hadm['DOSE_UNIT_RX'].apply(lambda x: str(x)).unique()

array(["['μg']", "['ml']", "['ml' 'bag']", "['ml' nan]", "['tube' 'ml']",
       "['g']", "['mg']", "['mg' 'g']", "['g' 'mg']", "['mg' 'tube']",
       '[nan]', "['10000iu']", "['bag']", "[nan 'mg']", "['mg' nan]",
       "['mg' 'bottle']", "['u']", "['bottle']", "['u' 'bottle']",
       "['box' nan]", "['drop']", "['tube' 'mg']", "['tube']", "['cm']",
       "['cm' 'tube']", "['bottle' 'tube']", "['unit']", "['u' 'unit']",
       "['ku']", "['KU']", "['tube' 'u']", "['u' 'tube']", "['iu']",
       "['iu' 'unit']", "['unit' 'iu']", "['ml' 'tube']",
       "['bag' '100 million']", "['100 million' 'bag']", "[nan 'g']",
       "['g' nan]", "['μg' nan]", "['g' 'bag']", "['bag' 'g']",
       "['bottle' 'unit']", "['unit' 'bottle']", "['tube' 'cm']",
       "['g' 'bottle']", "['ml' 'bottle']", "['bottle' 'ml']", "['box']",
       "['mg' 'ml']", "['ml' 'mg']", "['iu' 'tube']",
       "['10000iu' 'bottle']"], dtype=object)

In [19]:
cumulative_dose_per_hadm.drop("DOSE_UNIT_RX", inplace=True, axis=1)

In [20]:
result_df = cumulative_dose_per_hadm.unstack().T.fillna(0)

In [21]:
result_df.reset_index().drop("level_0", inplace=True, axis=1)

In [22]:
result_df

Unnamed: 0_level_0,DRUG_NAME_EN,Dexmedetomidine Hydrochloride Injection,(4:1)Glucose and Sodium Chloride Injection,0.9% Sodium Chloride Injection,10% Calcium Gluconate Injection,10% Chloral Hydrate Enemas,10% Chloral Hydrate Syrup,10% Glucose Injection,10% Potassium Chloride Injection,10% Potassium Chloride Oral Solution,10% Sodium Chloride Injection,...,Vitamin A and D Drops,Vitamin C Injection,Vitamin D Drops(capsule),Vitamin E Soft Capsules,Vitamin K1 Injection,Voriconazole for Injection,Water-soluble Vitamin for Injection,Zinc Gluconate Granules,lron Proteinsuccinylate Oral Solution,ω-3 Fish Oil Fat Emulsion Injection
Unnamed: 0_level_1,HADM_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
DOSE_VAL_RX,100001,0.0,5070.0,1160.5,50.0,0.00,0.0,0.0,251.5,0.0,127.0,...,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DOSE_VAL_RX,100002,0.0,3650.0,1199.1,40.0,0.00,0.0,24.0,139.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0
DOSE_VAL_RX,100003,0.0,4605.0,920.5,10.0,6.50,0.0,0.0,108.0,0.0,5.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DOSE_VAL_RX,100005,0.0,708.0,1375.7,18.0,10.25,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DOSE_VAL_RX,100006,0.0,2220.0,1520.5,0.0,0.00,0.0,280.0,114.0,0.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,54.0,0.0,17.0,0.0
DOSE_VAL_RX,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DOSE_VAL_RX,114197,0.0,820.0,200.0,0.0,0.00,2.0,0.0,22.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DOSE_VAL_RX,114198,0.0,460.0,1186.0,0.0,0.00,0.0,300.0,73.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
DOSE_VAL_RX,114199,0.0,160.0,216.0,0.0,0.00,0.0,0.0,15.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DOSE_VAL_RX,114200,0.0,350.0,36.0,0.0,0.00,0.0,0.0,8.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
icu_say_df_with_drugs = icu_stay_df.merge(result_df, on="HADM_ID", how="outer")

In [24]:
# fill NaNs with 0 as merge produced NaNs in missing columns (icustay with no common drugs) -> set their dose to 0 for each drug
icu_say_df_with_drugs[list(icu_say_df_with_drugs.iloc[:,218:418].columns)] = icu_say_df_with_drugs.iloc[:,218:418].fillna(value=0)

In [25]:
icu_say_df_with_drugs.head(n=5)

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,FIRST_CAREUNIT,LAST_CAREUNIT,INTIME,OUTTIME,LOS,disease_name_1,disease_type_1,...,Vitamin A and D Drops,Vitamin C Injection,Vitamin D Drops(capsule),Vitamin E Soft Capsules,Vitamin K1 Injection,Voriconazole for Injection,Water-soluble Vitamin for Injection,Zinc Gluconate Granules,lron Proteinsuccinylate Oral Solution,ω-3 Fish Oil Fat Emulsion Injection
0,26,100000,200532,General ICU,General ICU,2098-11-09 18:30:55,2098-11-14 09:18:00,5.0,"Noninfective gastroenteritis and colitis, unsp...",Digestive system,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,28,100001,200533,General ICU,General ICU,2104-09-03 10:36:46,2104-09-12 14:51:00,9.0,"Myocarditis, unspecified",Cardiovascular,...,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,29,100002,200534,General ICU,General ICU,2062-11-29 20:52:52,2062-12-07 14:27:00,8.0,Drowning and nonfatal submersion,"Poisoning, Injury or External",...,0.0,3.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0
3,30,100003,200535,General ICU,General ICU,2098-11-21 15:38:47,2098-11-26 01:33:00,4.0,Retina,Neoplasm,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,31,100004,200536,General ICU,General ICU,2071-11-13 19:43:12,2071-11-15 13:40:00,2.0,Rotaviral enteritis,Infection,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
icu_say_df_with_drugs.to_csv("./temp_pp_data_files/icu_stay_drugs.csv", index=False)

# Drug classes

In [16]:
drug_hadm_df = cumulative_dose_per_hadm.reset_index().drop(["DOSE_VAL_RX", "DOSE_UNIT_RX"], axis=1)

In [23]:
drugs = set(pd.unique(drug_hadm_df["DRUG_NAME_EN"]))

ab_matches = ["amoxi", "cef", "clinda", "imi", "mero", "azit", "vanco", "tige"]
catecholamines_matches = ["epineph", "adrena", "isopren", "dobut"]
antiviral_matches = ["vir"] 
antifungal_matches = ["caspofu", "voricona"]
ab, cats, vir, fung, rest = [],[],[],[],[]

for drug_ in drugs:
    if any([drug in drug_.lower() for drug in ab_matches]):
        ab.append(drug_)
    elif any([drug in drug_.lower() for drug in catecholamines_matches]):
        cats.append(drug_)
    elif any([drug in drug_.lower() for drug in antiviral_matches]):
        vir.append(drug_)
    elif any([drug in drug_.lower() for drug in antifungal_matches]):
        fung.append(drug_)
    else:
        rest.append(drug_)

In [31]:
drug_hadm_df["received_antibiotics"] = drug_hadm_df["DRUG_NAME_EN"].isin(ab)
drug_hadm_df["received_catecholamines"] = drug_hadm_df["DRUG_NAME_EN"].isin(cats)
drug_hadm_df["received_antivirals"] = drug_hadm_df["DRUG_NAME_EN"].isin(vir)
drug_hadm_df["received_antifungals"] = drug_hadm_df["DRUG_NAME_EN"].isin(fung)
drug_hadm_df["received_rest"] = drug_hadm_df["DRUG_NAME_EN"].isin(rest)

In [33]:
import numpy as np
# True -> 1, False -> 0 , sum > 0 means this drug has been given in this HADM ID
drug_flg_df = drug_hadm_df.groupby("HADM_ID").agg({"received_antibiotics": "sum", "received_catecholamines": "sum", "received_antivirals": "sum", "received_antifungals": "sum", "received_rest": "sum"}).reset_index()
drug_flg_df["received_antibiotics"] = np.where(drug_flg_df["received_antibiotics"] > 0, 1, 0)
drug_flg_df["received_catecholamines"] = np.where(drug_flg_df["received_catecholamines"] > 0, 1, 0)
drug_flg_df["received_antivirals"] = np.where(drug_flg_df["received_antivirals"] > 0, 1, 0)
drug_flg_df["received_antifungals"] = np.where(drug_flg_df["received_antifungals"] > 0, 1, 0)
drug_flg_df["received_rest"] = np.where(drug_flg_df["received_rest"] > 0, 1, 0)

In [35]:
df = pd.read_csv("./temp_pp_data_files/icu_stay_mibi_NEW.csv")
final_df = pd.merge(df, drug_flg_df, on='HADM_ID', how='left')
final_df["received_antibiotics"] = final_df["received_antibiotics"].fillna(0)
final_df["received_catecholamines"] = final_df["received_catecholamines"].fillna(0)
final_df["received_antivirals"] = final_df["received_antivirals"].fillna(0)
final_df["received_antifungals"] = final_df["received_antifungals"].fillna(0)
final_df["received_rest"] = final_df["received_rest"].fillna(0)

In [37]:
final_df.to_csv("./temp_pp_data_files/data_02_2023_NEW.csv", index=False)