In [1]:
import pandas as pd

In [2]:
IS_SURGICAL = [
    'CSURG',  # Cardiac Surgery
    'NSURG',  # Neurologic Surgical
    'ORTHO',  # Orthopaedic - surgical
    'PSURG',  # Plastic - restoration/reconstruction
    'SURG',   # Surgical - general surgical service
    'TSURG',  # Thoracic Surgical - surgery on the thorax
    'VSURG',  # Vascular Surgical
]

### is_elective

#### Determine time of ICU admission during hospital stay

In [8]:
df = pd.read_csv('D:/mimic-iii-clinical-database-1.4/ICUSTAYS.csv')
df.INTIME = pd.to_datetime(df.INTIME)

icu_times = {row.HADM_ID: row.INTIME for _, row in df.iterrows()}
icu_times[188028]

Timestamp('2128-06-24 15:05:20')

#### Determine whether a surgical procedure was carried out prior

In [16]:
df = pd.read_csv('D:/mimic-iii-clinical-database-1.4/SERVICES.csv')
df.TRANSFERTIME = pd.to_datetime(df.TRANSFERTIME)

is_elective = set()

for hadm_id, hadm_df in df.groupby('HADM_ID'):
    # determine time of ICU stay
    if hadm_id in icu_times:
        intime = icu_times[hadm_id]
        
        # limit to services before ICU admission
        hadm_df = hadm_df[hadm_df.TRANSFERTIME <= intime]
        
        # If patient had surgery prior to admission to the ICU during the same hospitalization -> elective
        is_surgical = (hadm_df.PREV_SERVICE.isin(IS_SURGICAL) | hadm_df.CURR_SERVICE.isin(IS_SURGICAL)).any().astype(int)
    
        if is_surgical:
            is_elective.add(hadm_id)
        
list(is_elective)[:5] + ['...']

[131072, 131073, 196612, 196617, 196619, '...']

### is_sepsis

We already determined whether a patient is likely to have sepsis in the data extraction, so we must only check whether there is a suspected infection time known (Note these are NaN when SOFA < 2).

In [38]:
cohort_df = pd.read_csv('D:/mimic-iii/final/cohort_all_admissions.csv')
demo_df = pd.read_csv('D:/mimic-iii/final/demographics_cohort.csv')

cohort_df = cohort_df.merge(demo_df, on=['subject_id', 'hadm_id', 'icustay_id'])

is_sepsis = cohort_df.icustay_id[(cohort_df.suspected_infection_time_poe.notna() & (cohort_df.sofa >= 2))].tolist()
is_sepsis[:5] + ['...']

[294638, 220597, 232669, 273430, 217847, '...']

## Augment demographics.csv

In [39]:
demo_df = pd.read_csv('D:/mimic-iii/final/demographics_cohort.csv')

demo_df['is_elective'] = demo_df.hadm_id.transform(lambda x: int(x in is_elective))
demo_df['is_sepsis'] = demo_df.icustay_id.transform(lambda x: int(x in is_sepsis))

demo_df.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,age,is_male,race_white,race_black,race_hispanic,race_other,height,...,lods,sirs,qsofa,qsofa_sysbp_score,qsofa_gcs_score,qsofa_resprate_score,elixhauser_hospital,blood_culture_positive,is_elective,is_sepsis
0,27513,163557,200003,48.294,1,1,0,0,0,177.8,...,3,4,2,1.0,0.0,1.0,0,1,1,1
1,20707,129310,200007,43.3439,1,1,0,0,0,177.8,...,2,2,2,1.0,0.0,1.0,0,0,0,0
2,29904,129607,200009,47.5548,0,1,0,0,0,161.29,...,4,4,2,1.0,0.0,1.0,-4,0,1,0
3,28448,177527,200012,32.9891,0,0,0,0,1,,...,1,3,2,1.0,0.0,1.0,0,0,0,0
4,9514,127229,200014,84.7274,1,0,0,0,1,167.64,...,5,4,2,1.0,0.0,1.0,0,1,1,1


In [41]:
# Save to local drive
demo_df.to_csv('final/demographics_cohort.csv', index=False)