# Find AKI patients in eICU database

In [116]:
import pandas as pd

In [117]:
patient_file = r"E:\EICU\eicu-collaborative-research-database-2.0\patient.csv.gz"
patient_df = pd.read_csv(patient_file, compression="gzip")

diagnosis_file = r"E:\EICU\eicu-collaborative-research-database-2.0\diagnosis.csv.gz"
diagnosis_df = pd.read_csv(diagnosis_file, compression="gzip")

### Step1: Find patients with AKI(88,288)

In [118]:
# ICD-9 584.x (AKI)
diagnosis_df['icd9code'] = diagnosis_df['icd9code'].astype(str).str.strip()
def contains_aki(icd9_list):
    icd9_codes = [code.strip() for code in icd9_list.split(',')]
    return any(code.startswith('584') for code in icd9_codes)

aki_diag = diagnosis_df[diagnosis_df['icd9code'].apply(contains_aki)]



In [119]:
aki_diag

Unnamed: 0,diagnosisid,patientunitstayid,activeupondischarge,diagnosisoffset,diagnosisstring,icd9code,diagnosispriority
36,4269128,141266,True,96,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Other
93,3539282,141304,True,260,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Other
101,4017918,141314,True,125,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Other
209,4130667,141448,True,17,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Other
219,3743308,141470,True,13,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Major
...,...,...,...,...,...,...,...
2710639,46300378,3353251,False,2653,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Other
2710641,46290865,3353251,False,11304,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Major
2710649,46299599,3353251,False,1201,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Other
2710657,46289644,3353251,False,4080,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Major


In [120]:
aki_df = patient_df.merge(aki_diag[['patientunitstayid']], on='patientunitstayid', how='inner')

In [121]:
aki_df

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141266,129000,Male,73,Caucasian,71,113,"Sepsis, renal/UTI (including bladder)",188.0,21:25:00,...,Emergency Department,1,admit,120.4,112.9,22:44:00,1501,Floor,Alive,002-76877
1,141304,129031,Male,70,Caucasian,59,91,"Sepsis, pulmonary",165.0,08:54:00,...,Emergency Department,1,admit,,68.0,23:36:00,6639,Floor,Alive,002-75420
2,141314,129039,Male,45,Caucasian,73,85,"Aneurysm, abdominal aortic; with rupture",170.2,00:41:00,...,Operating Room,2,transfer,,102.6,18:39:00,632,Death,Expired,002-70742
3,141448,129154,Male,65,Hispanic,73,100,Coma/change in level of consciousness (for hep...,167.6,08:33:00,...,Emergency Department,1,admit,79.8,87.2,00:56:00,983,Floor,Alive,002-5376
4,141470,129172,Female,58,Caucasian,66,90,"Obstruction/other, surgery for (with or withou...",162.6,07:23:00,...,Operating Room,1,admit,,108.6,21:19:00,831,Floor,Alive,002-56630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88283,3353251,2743099,Male,73,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166
88284,3353251,2743099,Male,73,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166
88285,3353251,2743099,Male,73,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166
88286,3353251,2743099,Male,73,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166


### Step2: Find patients with AKI(88,288) and ICU stay > 24 hours

In [122]:
aki_df = aki_df[aki_df['unitdischargeoffset'] > 24 * 60]
aki_df = aki_df.sort_values(by=['patienthealthsystemstayid', 'hospitaladmitoffset'])
aki_df = aki_df.drop_duplicates(subset=['patienthealthsystemstayid'], keep='first')


In [123]:
def clean_age(age):
    if age == ">89":
        return 90
    elif age.isdigit():  
        return int(age)
    else:
        return None

aki_df['age'] = aki_df['age'].astype(str).str.strip().apply(clean_age)

# 只保留 `age > 18` 的患者
aki_df = aki_df[aki_df['age'] > 18]



In [124]:

aki_df

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141266,129000,Male,73.0,Caucasian,71,113,"Sepsis, renal/UTI (including bladder)",188.0,21:25:00,...,Emergency Department,1,admit,120.40,112.9,22:44:00,1501,Floor,Alive,002-76877
1,141304,129031,Male,70.0,Caucasian,59,91,"Sepsis, pulmonary",165.0,08:54:00,...,Emergency Department,1,admit,,68.0,23:36:00,6639,Floor,Alive,002-75420
5,141515,129205,Female,68.0,Caucasian,73,114,"Sepsis, unknown",165.1,19:43:00,...,Floor,1,admit,70.90,76.0,17:57:00,8882,Floor,Alive,002-48364
9,141594,129267,Male,76.0,Caucasian,71,113,"CHF, congestive heart failure",182.9,17:56:00,...,Floor,1,admit,108.30,97.9,01:08:00,6011,Floor,Alive,002-25121
11,141751,129381,Female,60.0,Caucasian,71,87,"Sepsis, unknown",165.1,05:34:00,...,Emergency Department,1,admit,,67.7,20:15:00,5189,Floor,Alive,002-75863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88254,3353144,2743005,Male,51.0,African American,458,1106,"Renal failure, acute",180.3,18:53:00,...,Other Hospital,1,admit,39.20,42.2,20:10:00,15917,Step-Down Unit (SDU),Alive,035-20783
88260,3353190,2743049,Male,39.0,Caucasian,458,1106,Coma/change in level of consciousness (for hep...,180.3,19:35:00,...,Step-Down Unit (SDU),1,admit,110.60,101.0,00:32:00,3051,Floor,Alive,035-7770
88261,3353194,2743053,Female,51.0,Other/Unknown,458,1109,Cardiac arrest (with or without respiratory ar...,170.2,07:17:00,...,Emergency Department,1,admit,63.05,65.8,21:57:00,3590,Death,Expired,035-2735
88273,3353199,2743055,Female,66.0,Caucasian,458,1104,"Pneumonia, aspiration",157.5,06:38:00,...,Other ICU,2,transfer,71.50,96.8,23:37:00,10547,Step-Down Unit (SDU),Alive,035-18808


In [125]:
aki_df['patientunitstayid'].nunique()

14209

In [126]:
missing_ratio = aki_df.isnull().mean(axis=1)
aki_df = aki_df[missing_ratio < 0.3]

In [127]:
aki_df

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141266,129000,Male,73.0,Caucasian,71,113,"Sepsis, renal/UTI (including bladder)",188.0,21:25:00,...,Emergency Department,1,admit,120.40,112.9,22:44:00,1501,Floor,Alive,002-76877
1,141304,129031,Male,70.0,Caucasian,59,91,"Sepsis, pulmonary",165.0,08:54:00,...,Emergency Department,1,admit,,68.0,23:36:00,6639,Floor,Alive,002-75420
5,141515,129205,Female,68.0,Caucasian,73,114,"Sepsis, unknown",165.1,19:43:00,...,Floor,1,admit,70.90,76.0,17:57:00,8882,Floor,Alive,002-48364
9,141594,129267,Male,76.0,Caucasian,71,113,"CHF, congestive heart failure",182.9,17:56:00,...,Floor,1,admit,108.30,97.9,01:08:00,6011,Floor,Alive,002-25121
11,141751,129381,Female,60.0,Caucasian,71,87,"Sepsis, unknown",165.1,05:34:00,...,Emergency Department,1,admit,,67.7,20:15:00,5189,Floor,Alive,002-75863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88254,3353144,2743005,Male,51.0,African American,458,1106,"Renal failure, acute",180.3,18:53:00,...,Other Hospital,1,admit,39.20,42.2,20:10:00,15917,Step-Down Unit (SDU),Alive,035-20783
88260,3353190,2743049,Male,39.0,Caucasian,458,1106,Coma/change in level of consciousness (for hep...,180.3,19:35:00,...,Step-Down Unit (SDU),1,admit,110.60,101.0,00:32:00,3051,Floor,Alive,035-7770
88261,3353194,2743053,Female,51.0,Other/Unknown,458,1109,Cardiac arrest (with or without respiratory ar...,170.2,07:17:00,...,Emergency Department,1,admit,63.05,65.8,21:57:00,3590,Death,Expired,035-2735
88273,3353199,2743055,Female,66.0,Caucasian,458,1104,"Pneumonia, aspiration",157.5,06:38:00,...,Other ICU,2,transfer,71.50,96.8,23:37:00,10547,Step-Down Unit (SDU),Alive,035-18808


In [128]:
aki_df['patientunitstayid'].nunique()

14209

In [130]:
# admission

def classify_admission(source):
    if isinstance(source, str):
        emergency_keywords = ["Emergency", "ICU"]
        elective_keywords = ["Acute Care", "Chest Pain Center","Direct Admit", "Floor", "Observation", "Operating Room", "PACU", "Recovery", "Step-Down", "Other Hospital","Other"]

        if any(keyword in source for keyword in emergency_keywords):
            return "Emergency"
        elif any(keyword in source for keyword in elective_keywords):
            return "Elective"
        else:
            return "Unknown"
    
# 应用到 aki_df
aki_df["admission_type"] = patient_df["unitadmitsource"].apply(classify_admission)

In [131]:
aki_df.to_csv('aki_patients.csv', index=False)

In [132]:
aki_df

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid,admission_type
0,141266,129000,Male,73.0,Caucasian,71,113,"Sepsis, renal/UTI (including bladder)",188.0,21:25:00,...,1,admit,120.40,112.9,22:44:00,1501,Floor,Alive,002-76877,Elective
1,141304,129031,Male,70.0,Caucasian,59,91,"Sepsis, pulmonary",165.0,08:54:00,...,1,admit,,68.0,23:36:00,6639,Floor,Alive,002-75420,Emergency
5,141515,129205,Female,68.0,Caucasian,73,114,"Sepsis, unknown",165.1,19:43:00,...,1,admit,70.90,76.0,17:57:00,8882,Floor,Alive,002-48364,Emergency
9,141594,129267,Male,76.0,Caucasian,71,113,"CHF, congestive heart failure",182.9,17:56:00,...,1,admit,108.30,97.9,01:08:00,6011,Floor,Alive,002-25121,Emergency
11,141751,129381,Female,60.0,Caucasian,71,87,"Sepsis, unknown",165.1,05:34:00,...,1,admit,,67.7,20:15:00,5189,Floor,Alive,002-75863,Elective
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88254,3353144,2743005,Male,51.0,African American,458,1106,"Renal failure, acute",180.3,18:53:00,...,1,admit,39.20,42.2,20:10:00,15917,Step-Down Unit (SDU),Alive,035-20783,
88260,3353190,2743049,Male,39.0,Caucasian,458,1106,Coma/change in level of consciousness (for hep...,180.3,19:35:00,...,1,admit,110.60,101.0,00:32:00,3051,Floor,Alive,035-7770,Emergency
88261,3353194,2743053,Female,51.0,Other/Unknown,458,1109,Cardiac arrest (with or without respiratory ar...,170.2,07:17:00,...,1,admit,63.05,65.8,21:57:00,3590,Death,Expired,035-2735,Elective
88273,3353199,2743055,Female,66.0,Caucasian,458,1104,"Pneumonia, aspiration",157.5,06:38:00,...,2,transfer,71.50,96.8,23:37:00,10547,Step-Down Unit (SDU),Alive,035-18808,Elective


In [133]:
aki_df['hospitaldischargestatus'].value_counts()

hospitaldischargestatus
Alive      11450
Expired     2622
Name: count, dtype: int64

In [134]:
aki_df['hospitalid'].value_counts()

hospitalid
420    999
188    559
417    497
73     491
252    473
      ... 
263      1
437      1
414      1
179      1
86       1
Name: count, Length: 196, dtype: int64

In [135]:
aki_420 = aki_df[aki_df['hospitalid'] == 420]

In [136]:
aki_420['hospitaldischargestatus'].value_counts()

hospitaldischargestatus
Alive      777
Expired    218
Name: count, dtype: int64

In [137]:
aki_188 = aki_df[aki_df['hospitalid'] == 188]
aki_188['hospitaldischargestatus'].value_counts()

hospitaldischargestatus
Alive      459
Expired    100
Name: count, dtype: int64