In [1]:
import numpy as np
import pandas as pd
import copy
import time
import re
import collections
from datetime import datetime, timedelta
import os
from tqdm import tqdm
import sys

sys.path.append("..")
from utils.tools import df_analyze

to_timestamp = lambda s: np.int64(time.mktime(datetime.strptime(str(s), "%Y-%m-%d %H:%M:%S").timetuple()))
pattern = re.compile(r'^[0-9]*\.?[0-9]+$',re.S)

# datas中存储从mimic中提取的未处理数据
root_path = "../mimicdata/"
ts_data = 'P18'
text_data = 'Other'

save_dir = "../Multi_data/"
data_name = ts_data + '_' + text_data + '/'
save_path = save_dir + data_name + 'rawdata/'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if not os.path.exists(save_path):
    os.makedirs(save_path)

if not os.path.exists(save_path + 'set/'):
    os.makedirs(save_path + 'set/')

In [3]:
bio_path = '../mimicdata/%s/biomarks.csv'%(ts_data)
code_path = "../mimicdata/icd-9.csv"
admission_path = "../mimicdata/admissions.csv"
note_path = "../mimicdata/NOTEEVENTS.csv"

In [2]:

'''
# 统计时序数据中每个记录的变量个数
allfiles = os.listdir('../Mimic_ts/rawdata/set/')
allfiles.sort()
tmp = []
for f in tqdm(allfiles):
    file_name, file_ext = os.path.splitext(f)
    if file_ext == '.txt':
        df = pd.read_csv('./Mimic_ts/rawdata/set/'+file_name+'.txt', sep=",", header=1, names=["time", "param", "value"])
        tmp.append(len(df['param'].loc[4:].unique()))

count = collections.Counter(tmp)
print(count)
'''

'\n# 统计时序数据中每个记录的变量个数\nallfiles = os.listdir(\'./Mimic_ts/rawdata/set/\')\nallfiles.sort()\ntmp = []\nfor f in tqdm(allfiles):\n    file_name, file_ext = os.path.splitext(f)\n    if file_ext == \'.txt\':\n        df = pd.read_csv(\'./Mimic_ts/rawdata/set/\'+file_name+\'.txt\', sep=",", header=1, names=["time", "param", "value"])\n        tmp.append(len(df[\'param\'].loc[4:].unique()))\n\ncount = collections.Counter(tmp)\nprint(count)\n'

<font size=5>Pre-process biomarkers data</font>

1. Clean the value in the dataset and drop the null
2. Transform date time to timestamp
3. Sort it based on "RecordID" and "Time", ascending

In [9]:
def substitute(x):
    x = str(x)
    if x == 'Yes' or x == 'Comment':
        return 1
    elif x == 'No':
        return 0
    elif re.search(pattern, x):
        return float(x)
    else:
        return np.NaN
# x in ['\\N', 'no data', 'ERROR']

In [10]:
def process_chartevents(filename):
    df = pd.read_csv(filename, low_memory=False, 
            names=['PatientID','RecordID', 'Parameter', 'Value', 'Euom', 'Time'], sep='\t')
    df['Value'] = df['Value'].apply(substitute)
    df = df.dropna(axis=0, subset=['Value'])
    df['Time'] = df.Time.apply(to_timestamp)
    df.sort_values(["RecordID", "Time"], ascending=True, inplace=True)

    return df

In [4]:
# df0 = process_chartevents(bio_path)
df0 = pd.read_csv(save_path + 'biomarks.csv', low_memory=False)

In [5]:
df0.head()

Unnamed: 0,PatientID,RecordID,Parameter,Value,Euom,Time
0,58526,100001,220210,14.0,insp/min,4660779420
1,58526,100001,220210,22.0,insp/min,4660779600
2,58526,100001,220179,192.0,mmHg,4660779660
3,58526,100001,220180,100.0,mmHg,4660779660
4,58526,100001,220181,122.0,mmHg,4660779660


In [6]:
df0[df0.Time<0]

Unnamed: 0,PatientID,RecordID,Parameter,Value,Euom,Time


In [7]:
# 更换特征时需修改
item2id = {
    'creatinine':[791, 1525, 220615],
    'disastolic_blood_pressure':[224643, 225310, 220180, 8555, 220051, 8368, 8441, 8440],
    'glucose':[220621, 225664, 811, 807, 226537, 1529],
    'heart_rate':[211, 20045],
    'hemoglobin':[220228, 814],
    'mean_blood_pressure':[456, 220181, 224, 225312, 220052, 52, 6702, 224322],
    'oxygen_saturation':[646, 834, 220177, 220227],
    'platelets':[828, 227457],
    'red_blood_cell_count':[833],
    'respiratory_rate':[224422, 618, 220210, 224689, 614, 651, 224690, 615],
    'systemic_vascular_resistance':[626],
    'systolic_blood_pressure':[442, 227243, 224167, 220179, 225309, 6701, 220050, 51, 455],
    'temperature':[223761, 671, 676, 679, 678, 223762],
    'white_blood_cell_count':[861, 1542, 220546, 1127],
    'cholesterol':[789, 3748, 1524, 220603],
    'visit':[3385, 3512],
    'blood_flow':[79, 224144],
    'red_blood_cell':[3799],
    'white_blood_cell':[3834]
    }

In [8]:
id2item = {}
for key, value in item2id.items():
    for id in value:
        id2item[id] = key

In [9]:
df0['Parameter'] = df0['Parameter'].apply(lambda x: id2item[x])

In [45]:
df_analyze(df0)

Unnamed: 0,Column,null,unique,type
0,PatientID,0,46110,int64
1,RecordID,0,56857,int64
2,Parameter,0,18,object
3,Value,0,29299,float64
4,Euom,0,20,object
5,Time,0,3421211,int64


In [10]:
df0.to_csv(save_path + 'biomarks_plus.csv', index=None)

<font size=5>Combine diagnosis and procedure codes and reformat them</font>

The codes in MIMIC-III are given in separate files for procedures and diagnoses, and the codes are given without periods, which might lead to collisions if we naively combine them. So we have to add the periods back in the right place.

In [11]:
def reformat(code, is_diag):
    """
        Put a period in the right place because the MIMIC-3 data files exclude them.
        Generally, procedure codes have dots after the first two digits, 
        while diagnosis codes have dots after the first three digits.
    """
    code = ''.join(code.split('.'))
    if is_diag:
        if code.startswith('E'):
            if len(code) > 4:
                code = code[:4] + '.' + code[4:]
        else:
            if len(code) > 3:
                code = code[:3] + '.' + code[3:]
    else:
        code = code[:2] + '.' + code[2:]
    return code

In [28]:

dfproc = pd.read_csv('%sPROCEDURES_ICD.csv' % root_path)
dfdiag = pd.read_csv('%sDIAGNOSES_ICD.csv' % root_path)

In [29]:
dfdiag['absolute_code'] = dfdiag.apply(lambda row: str(reformat(str(row[4]), True)), axis=1)
dfproc['absolute_code'] = dfproc.apply(lambda row: str(reformat(str(row[4]), False)), axis=1)

In [30]:
dfcodes = pd.concat([dfdiag, dfproc])

In [31]:
dfcodes.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE,absolute_code
0,1297,109,172335,1.0,40301,403.01
1,1298,109,172335,2.0,486,486.0
2,1299,109,172335,3.0,58281,582.81
3,1300,109,172335,4.0,5855,585.5
4,1301,109,172335,5.0,4254,425.4


In [32]:
def merge_icd(dfcodes):
    IDs = []
    code = []
    group_codes = dfcodes.groupby('HADM_ID')
    for group in group_codes:
        recordId, df0 = group[0], group[1]
        arr = list(df0['absolute_code'])
        new_code = ';'.join(arr)
        IDs.append(recordId)
        code.append(new_code)
    df = pd.DataFrame({'RecordID': IDs, 'ICD': code})
    return df
            

In [33]:
df_label = merge_icd(dfcodes)

In [34]:
df_label.head()

Unnamed: 0,RecordID,ICD
0,100001,250.13;337.1;584.9;578.0;V58.67;250.63;536.3;4...
1,100003,531.00;285.1;070.54;571.5;456.21;537.89;401.9;...
2,100006,493.20;518.81;486;203.00;276.1;785.0;309.0;V12...
3,100007,560.81;557.0;997.3;486;401.9;45.62;54.59
4,100009,414.01;996.04;414.2;250.00;278.00;V85.35;414.8...


In [108]:
# df_label.to_csv(code_path, index=None)

<font size=5>Delete irrelevant icd-9 codes</font>

If you only pay attention to one or several diseases instead of full diseases in MIMIC-III, you should create a csv file that includes icd-9 codes of your specified diseases.\
For example, we do research on cardiovascular disease, so we created ICD9_cardiovascular.csv manually first.

In [35]:
df_label.set_index('RecordID', inplace=True)

In [19]:
df_cvd = pd.read_csv('%sICD9_cardiovascular.csv' % root_path, sep="\t", dtype="str")

In [20]:
set_cvd = set(df_cvd['code'].unique())

In [36]:
df_label.head()

Unnamed: 0_level_0,ICD
RecordID,Unnamed: 1_level_1
100001,250.13;337.1;584.9;578.0;V58.67;250.63;536.3;4...
100003,531.00;285.1;070.54;571.5;456.21;537.89;401.9;...
100006,493.20;518.81;486;203.00;276.1;785.0;309.0;V12...
100007,560.81;557.0;997.3;486;401.9;45.62;54.59
100009,414.01;996.04;414.2;250.00;278.00;V85.35;414.8...


In [21]:
# 自定义数据集的label
def get_labels(df1, admission_path, df0:pd.DataFrame, set_cvd):
    def Map(x):
        arr = set(df1.loc[x['RecordID'], 'ICD'].split(";"))
        arr &= set_cvd
        if len(arr) == 0:
            return ""
        else:
            astr = [str(i) for i in arr]
            return "-".join(astr)
        
    # df1 = pd.read_csv(code_path, low_memory=False, 
    #         names=['PatientID','RecordID', 'ICD-9'], sep='\t')
    df2 = pd.read_csv(admission_path, low_memory=False, names=['PatientID','RecordID', 'AdmitTime', 'DischTime', 'Death', 
                   'Ethnicity', 'Marital','Gender', 'BirthDate'], sep='\t')
    
    records = df0.RecordID.unique()

    df2['AdmitTime'] = df2.AdmitTime.apply(to_timestamp)
    df2['DischTime'] = df2.DischTime.apply(to_timestamp)
    df2['BirthDate'] = df2.BirthDate.apply(to_timestamp)

    f1 = df2.apply(lambda x: round((x['AdmitTime']-x['BirthDate'])/31536000), axis=1) # (3600*24*365)
    f2 = df2.apply(lambda x: round((x['DischTime']-x['AdmitTime'])/3600), axis=1)
    f4 = df2.apply(lambda x: Map(x), axis=1)
    df2 = pd.concat([df2, f1.rename('Age'), f2.rename('Length_of_stay'), f4.rename('ICD')], axis=1)
    df2['Cvd'] = df2.apply(lambda x: int(len(x['ICD'])!=0), axis=1)

    features = ['RecordID', 'PatientID', 'Length_of_stay', 'Death', 'Cvd', 'ICD']
    df_target = df2[features]
    df_target.sort_values("RecordID", ascending=True, inplace=True)
    df_target.set_index("RecordID", inplace=True)
    df_target = df_target.loc[records, :]
    df_target.reset_index(inplace=True)
    
    return df2, df_target

In [37]:
df2, df_target = get_labels(df_label, admission_path, df0, set_cvd)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [38]:
df_target.head()

Unnamed: 0,RecordID,PatientID,Length_of_stay,Death,Cvd,ICD
0,100001,58526,149,0,0,
1,100003,54610,98,0,0,
2,100006,9895,289,0,0,
3,100007,23018,175,0,0,
4,100009,533,118,0,1,414.01-414.8-414.2-411.1


In [39]:
df_target.to_csv("%s/Outcomes.csv" % save_path, index=None)

In [40]:
df_analyze(df2)

Unnamed: 0,Column,null,unique,type
0,PatientID,0,46520,int64
1,RecordID,0,58976,int64
2,AdmitTime,0,58651,int64
3,DischTime,0,58657,int64
4,Death,0,2,int64
5,Ethnicity,0,41,object
6,Marital,0,8,object
7,Gender,0,2,object
8,BirthDate,0,32540,int64
9,Age,0,88,int64


<font size=5>Pre-process text dataset</font>

1. Fill timestamp of any entries
2. Fill 'RecordID' based on fuzzy method

In [72]:
df3 = pd.read_csv(note_path, low_memory=False)

In [73]:
df_analyze(df3)

Unnamed: 0,Column,null,unique,type
0,ROW_ID,0,2083180,int64
1,SUBJECT_ID,0,46146,int64
2,HADM_ID,231836,58361,float64
3,CHARTDATE,0,38686,object
4,CHARTTIME,316566,1448491,object
5,STORETIME,836776,1224714,object
6,CATEGORY,0,15,object
7,DESCRIPTION,0,3848,object
8,CGID,836776,1912,float64
9,ISERROR,2082294,1,float64


In [57]:
features = ['SUBJECT_ID','HADM_ID','CHARTDATE','CHARTTIME', 'STORETIME','TEXT']

In [58]:
df3.loc[df3.HADM_ID.isna(), features]

Unnamed: 0,SUBJECT_ID,HADM_ID,CHARTDATE,CHARTTIME,STORETIME,TEXT
52115,82208,,2200-01-02,,,PATIENT/TEST INFORMATION:\nIndication: Congest...
52116,15472,,2176-05-28,,,PATIENT/TEST INFORMATION:\nIndication: Left ve...
59102,96774,,2174-03-31,,,PATIENT/TEST INFORMATION:\nIndication: Dyspnea...
59119,1136,,2193-09-19,,,PATIENT/TEST INFORMATION:\nIndication: Pericar...
59121,98046,,2198-09-13,,,PATIENT/TEST INFORMATION:\nIndication: s/p AVR...
...,...,...,...,...,...,...
2067614,26525,,2137-01-10,2137-01-10 14:30:00,2137-01-10 14:37:00,Nursing Progress Note:\nAdmitted infant girl [...
2067615,26525,,2137-01-10,2137-01-10 14:34:00,2137-01-10 14:53:00,Neonatology Sr. Fellow Admission and Transfer ...
2067632,26525,,2137-01-10,2137-01-10 19:16:00,2137-01-10 19:26:00,Neonatology\n[**Known lastname 2666**] has ret...
2067633,26525,,2137-01-10,2137-01-10 19:21:00,2137-01-10 20:19:00,Re-admit/NPN Note:\nInfant returned from TCH v...


In [59]:
def fill_time_in_notes(df3):
    def func(x):
        if not pd.isnull(x.CHARTTIME):
            res = x.CHARTTIME
        elif not pd.isnull(x.STORETIME):
            res = x.STORETIME
        else:
            res = str(x.CHARTDATE) + " 10:00:00"
        return res
    df3['Time'] = df3.apply(func, axis=1)
    return df3

In [74]:
df3 = fill_time_in_notes(df3)

In [75]:
df3.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,CHARTDATE,CHARTTIME,STORETIME,CATEGORY,DESCRIPTION,CGID,ISERROR,TEXT,Time
0,174,22532,167853.0,2151-08-04,,,Discharge summary,Report,,,Admission Date: [**2151-7-16**] Dischar...,2151-08-04 10:00:00
1,175,13702,107527.0,2118-06-14,,,Discharge summary,Report,,,Admission Date: [**2118-6-2**] Discharg...,2118-06-14 10:00:00
2,176,13702,167118.0,2119-05-25,,,Discharge summary,Report,,,Admission Date: [**2119-5-4**] D...,2119-05-25 10:00:00
3,177,13702,196489.0,2124-08-18,,,Discharge summary,Report,,,Admission Date: [**2124-7-21**] ...,2124-08-18 10:00:00
4,178,26880,135453.0,2162-03-25,,,Discharge summary,Report,,,Admission Date: [**2162-3-3**] D...,2162-03-25 10:00:00


In [62]:
def myjudge(time1, time2):
    gap = time1 - time2 
    if gap < 0:
        gap = -gap
    # 30 days
    if gap <= 2592000:
        return True
    else:
        return False

def fuzzy_RecordID(x, mydict):
    # 30 * 24 * 3600 = 2592000
    flag = 0
    if np.isnan(x.HADM_ID):
        arr = mydict[x.SUBJECT_ID]
        length = len(arr)
        if length == 0:
            return np.nan
        
        time = x.Time
        if time < arr[0][0] and myjudge(time, arr[0][0]):
            return arr[0][1]

        for i in range(1, length):
            mid = (arr[i-1][0] + arr[i][0]) / 2
            if time > mid:
                if myjudge(time, arr[i][0]):
                    return arr[i][1]
                else:
                    return np.nan
            else:
                if myjudge(time, arr[i-1][0]):
                    return arr[i-1][1]
                else:
                    return np.nan
        if time < arr[-1][0] and myjudge(time, arr[-1][0]):
            return arr[-1][1]
    else:
        return x.HADM_ID


def fill_notes(df0, df3):
    # 模糊填充NOTEEVENTS中的HADM_ID字段
    mydict = collections.defaultdict(list)
    records = df0.RecordID.unique()
    
    for record in records:
        df_tmp = df0[df0.RecordID==record]
        df_tmp = df_tmp.reset_index(drop=True)
        length = len(df_tmp)
        time = sum(df_tmp.Time) / length
        # time = df_tmp.iloc[length//2, 5]
        mydict[df_tmp.iloc[0, 0]].append([time, record])
    
    '''
    lastID = 0
    for row in df0.itertuples():
        if getattr(row, 'RecordID') == lastID:
            continue
        else:
            mydict[getattr(row, 'PatientID')].append([getattr(row, 'Time'), getattr(row, 'RecordID')])
            lastID = getattr(row, 'RecordID')
    '''
            
    df3['RecordID'] = df3.apply(lambda x: fuzzy_RecordID(x, mydict), axis=1)
    return df3

In [76]:
df3['Time'] = df3['Time'].apply(to_timestamp)

In [77]:
df3 = fill_notes(df0, df3)

In [78]:
df_analyze(df3)

Unnamed: 0,Column,null,unique,type
0,ROW_ID,0,2083180,int64
1,SUBJECT_ID,0,46146,int64
2,HADM_ID,231836,58361,float64
3,CHARTDATE,0,38686,object
4,CHARTTIME,316566,1448491,object
5,STORETIME,836776,1224714,object
6,CATEGORY,0,15,object
7,DESCRIPTION,0,3848,object
8,CGID,836776,1912,float64
9,ISERROR,2082294,1,float64


In [79]:
df3.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,CHARTDATE,CHARTTIME,STORETIME,CATEGORY,DESCRIPTION,CGID,ISERROR,TEXT,Time,RecordID
0,174,22532,167853.0,2151-08-04,,,Discharge summary,Report,,,Admission Date: [**2151-7-16**] Dischar...,5730400800,167853.0
1,175,13702,107527.0,2118-06-14,,,Discharge summary,Report,,,Admission Date: [**2118-6-2**] Discharg...,4684615200,107527.0
2,176,13702,167118.0,2119-05-25,,,Discharge summary,Report,,,Admission Date: [**2119-5-4**] D...,4714423200,167118.0
3,177,13702,196489.0,2124-08-18,,,Discharge summary,Report,,,Admission Date: [**2124-7-21**] ...,4879620000,196489.0
4,178,26880,135453.0,2162-03-25,,,Discharge summary,Report,,,Admission Date: [**2162-3-3**] D...,6066151200,135453.0


In [80]:
def get_notes(df3):
    cols = ['SUBJECT_ID', 'HADM_ID', 'TEXT', 'Time', 'CATEGORY']
    df3.dropna(subset=['HADM_ID'], axis=0, inplace=True)
    df3['CATEGORY'] = df3['CATEGORY'].apply(lambda x: 'Discharge_summary' if x=='Discharge summary' else x)
    df3 = df3[cols]
    df3.columns = ['PatientID', 'RecordID', 'Text', 'Time', 'category']
    return df3
    

In [81]:
df4 = get_notes(df3)

In [84]:
df4.head()

Unnamed: 0,PatientID,RecordID,Text,Time,category
0,22532,167853.0,Admission Date: [**2151-7-16**] Dischar...,5730400800,Discharge_summary
1,13702,107527.0,Admission Date: [**2118-6-2**] Discharg...,4684615200,Discharge_summary
2,13702,167118.0,Admission Date: [**2119-5-4**] D...,4714423200,Discharge_summary
3,13702,196489.0,Admission Date: [**2124-7-21**] ...,4879620000,Discharge_summary
4,26880,135453.0,Admission Date: [**2162-3-3**] D...,6066151200,Discharge_summary


In [83]:
df4.to_csv(save_path + 'Noteevents.csv', index=None)

<font size=5>Merge dynamic data and static data</font>

In [48]:
def get_measure_data(df0, df2):
    df_grouped = df0.groupby('RecordID')
    cols = ['Time', 'Parameter', 'Value']
    for group in df_grouped:
        recordId, df3 = group[0], group[1]

        tmp = df2[df2.RecordID == recordId]
        Gender = tmp.Gender.values[0]
        Marital = tmp.Marital.values[0].lower()
        Ethnicity = tmp.Ethnicity.values[0].lower()

        ttmp = [recordId, tmp['Age'].values[0], 0, 0, 0]
        
        if Ethnicity == 'white':
            ttmp[2] = 1
        elif 'black' in Ethnicity:
            ttmp[2] = 2
        else:
            pass
        
        if Marital == 'married':
            ttmp[3] = 1
        elif Marital == 'single':
            ttmp[3] = 2
        elif Marital == 'windowed':
            ttmp[3] = 3
        else:
            pass
        
        ttmp[4] = 1 if Gender == 'F' else 0 

        # Add 'Age', 'Ethnicity', 'Marital','Gender' 
        initDict = {"Time":[0, 0, 0, 0, 0], "Parameter":['RecordID', 'Age', 'Ethnicity', 'Marital','Gender'], "Value":ttmp }
        df4 = pd.DataFrame(initDict)
        
        df3.reset_index(drop=True)
        mindata = min(df3['Time'])
        # print(mindata)
        df3.Time = df3.Time.apply(lambda x: int((x - mindata)/60))
        df3 = df3[cols]
        df5 = df4.append(df3, ignore_index=True)
        df5 = df5.reset_index(drop=True)

        to_path = save_path + "set/" + str(recordId) + '.txt'
        df5.to_csv(to_path, sep=',', index=None)

In [50]:
df2.head()

Unnamed: 0,PatientID,RecordID,AdmitTime,DischTime,Death,Ethnicity,Marital,Gender,BirthDate,Age,Length_of_stay,ICD,Cvd
0,58526,100001,4660775160,4661311500,0,WHITE,DIVORCED,F,3541248000,35,149,,0
1,54610,100003,5689467240,5689819800,0,WHITE,SINGLE,M,3798806400,60,98,,0
2,9895,100006,4363141740,4364183880,0,BLACK/AFRICAN AMERICAN,SINGLE,F,2819462400,49,289,,0
3,23018,100007,5530195980,5530826400,0,WHITE,MARRIED,F,3200572800,74,175,,0
4,533,100009,6070665360,6071089020,0,WHITE,MARRIED,M,4152096000,61,118,414.01-414.8-414.2-411.1,1


In [51]:
get_measure_data(df0, df2)

In [52]:
df_grouped = df0.groupby('RecordID')

In [53]:
len(df_grouped)

56857

In [11]:
MIMIC_NOTES_FILE = '../Multi_data/P18_Discharge_summary/rawdata/Noteevents.csv'

In [12]:
notes = pd.read_csv(MIMIC_NOTES_FILE, index_col = 0)

  mask |= (ar1 == a)


In [13]:
notes.head()

Unnamed: 0_level_0,RecordID,Text,Time,category
PatientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
22532,167853.0,Admission Date: [**2151-7-16**] Dischar...,5730400800,Discharge_summary
13702,107527.0,Admission Date: [**2118-6-2**] Discharg...,4684615200,Discharge_summary
13702,167118.0,Admission Date: [**2119-5-4**] D...,4714423200,Discharge_summary
13702,196489.0,Admission Date: [**2124-7-21**] ...,4879620000,Discharge_summary
26880,135453.0,Admission Date: [**2162-3-3**] D...,6066151200,Discharge_summary
