In [12]:
import pandas as pd
from tqdm import tqdm
import os
from typing import List, Dict
import datasets
import pickle

In [13]:
# Same ordering as paper
task_2_name: Dict[str, str] = {
    # Operational outcomes
    'guo_los': 'Long LOS',
    'guo_readmit': '30-Day Readmission',
    'guo_icu': 'ICU Admission',
    # Anticipating lab test results
    'lab_thrombocytopenia': 'Thrombocytopenia',
    'lab_hyperkalemia': 'Hyperkalemia',
    'lab_hypoglycemia': 'Hypoglycemia',
    'lab_hyponatremia': 'Hyponatremia',
    'lab_anemia': 'Anemia',
    # Assignment of new diagnoses
    'new_hypertension': 'Hypertension',
    'new_hyperlipidemia': 'Hyperlipidemia',
    'new_pancan': 'Pancreatic Cancer',
    'new_celiac': 'Celiac',
    'new_lupus': 'Lupus',
    'new_acutemi' : 'Acute MI',
    # Anticipating chest x-ray findings
    'chexpert' : 'Chest X-Ray',
}

task_2_value_type: Dict[str, str] = {
    'new_pancan': 'boolean',
    'new_celiac': 'boolean',
    'new_lupus': 'boolean',
    'new_acutemi' : 'boolean',
    'new_hypertension': 'boolean',
    'new_hyperlipidemia': 'boolean',
    'guo_los': 'boolean',
    'guo_readmit': 'boolean',
    'guo_icu': 'boolean',
    'lab_thrombocytopenia': 'multiclass',
    'lab_hyperkalemia': 'multiclass',
    'lab_hypoglycemia': 'multiclass',
    'lab_hyponatremia': 'multiclass',
    'lab_anemia': 'multiclass',
    'chexpert' : 'multilabel',
}

In [14]:
dataset = datasets.Dataset.from_parquet('/Users/mwornow/Downloads/ehrshot-meds-standard-stanford/data/*.parquet')
ontology = pickle.load(open('/Users/mwornow/Desktop/ehrshot-benchmark/assets/ontology_standard.pkl', 'rb'))

In [15]:
path_to_splits = '/Users/mwornow/Downloads/som-nero-nigam-starr.starr_omop_cdm5_confidential_filtered_2024_02_12_ehrshot_release_dua/person_id_map/merged.csv'
df_splits = pd.read_csv(path_to_splits)
df_splits.shape

(6732, 2)

# New EHRSHOT

Do split by train/test/val

In [16]:
results = {
    'train' : [],
    'test' : [],
    'val' : [],
    'all' : []
}
for task, task_name in tqdm(task_2_name.items()):
    path_to_task_csv: str = f"../../assets/labels/{task}_labels.csv"
    if not os.path.exists(path_to_task_csv):
        print(f"Skipping {task_name}")
        continue
    try:
        df = pd.read_csv(path_to_task_csv)
        df['boolean_value'] = df['boolean_value'] if 'boolean_value' in df.columns else ''
        df['integer_value'] = df['integer_value'] if 'integer_value' in df.columns else ''
        df['categorical_value'] = df['categorical_value'] if 'categorical_value' in df.columns else ''
        df['float_value'] = df['float_value'] if 'float_value' in df.columns else ''
        df.to_csv(path_to_task_csv, index=False)
        if task_2_value_type[task] == "boolean":
            df['is_positive_label'] = df["boolean_value"]      
        elif task_2_value_type[task] == "multiclass":
            df['is_positive_label'] = df["integer_value"] > 0
        else:
            print(f"Skipping {task_name}")
            continue
    except Exception as e:
        print(f"Skipping {task_name}")
        print(e)
        continue
    
    # Splits
    for split in ['train', 'test', 'val']:
        df_split = df[df['patient_id'].isin(df_splits[df_splits['split'] == split]['omop_person_id'])]
        results[split].append({
            'task' : task,
            'task_name' : task_name,
            'n_patients' : df_split['patient_id'].nunique(),
            'n_positive_patients' : df_split.groupby('patient_id')['is_positive_label'].max().sum(),
            'n_labels' : df_split.shape[0],
            'n_positive_labels' : df_split['is_positive_label'].sum(),
        })
    
    # All
    results['all'].append({
        'task' : task,
        'task_name' : task_name,
        'n_patients' : df['patient_id'].nunique(),
        'n_positive_patients' : df.groupby('patient_id')['is_positive_label'].max().sum(),
        'n_labels' : df.shape[0],
        'n_positive_labels' : df['is_positive_label'].sum(),
    })

for key in results.keys():
    results[key] = pd.DataFrame(results[key])
    results[key]['n_negative_labels'] = results[key]['n_labels'] - results[key]['n_positive_labels']
    results[key]['n_negative_patients'] = results[key]['n_patients'] - results[key]['n_positive_patients']
    results[key]['label_prevalence'] = results[key]['n_positive_labels'] / results[key]['n_labels']

100%|██████████| 15/15 [00:03<00:00,  4.60it/s]

Skipping Chest X-Ray





In [17]:
# All splits
results['all']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,4659,2332,14671,4938,9733,2327,0.336582
1,guo_readmit,30-Day Readmission,4514,1301,15545,3760,11785,3213,0.241878
2,guo_icu,ICU Admission,4508,756,14151,971,13180,3752,0.068617
3,lab_thrombocytopenia,Thrombocytopenia,6048,2587,185940,61037,124903,3461,0.328262
4,lab_hyperkalemia,Hyperkalemia,5908,1293,208136,4925,203211,4615,0.023662
5,lab_hypoglycemia,Hypoglycemia,5968,1419,333146,5060,328086,4549,0.015189
6,lab_hyponatremia,Hyponatremia,5909,3707,220836,62520,158316,2202,0.283106
7,lab_anemia,Anemia,6070,4281,191268,131278,59990,1789,0.686356
8,new_hypertension,Hypertension,2553,505,6254,910,5344,2048,0.145507
9,new_hyperlipidemia,Hyperlipidemia,2914,536,8052,999,7053,2378,0.124069


In [18]:
# Train
results['train']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,1625,855,5543,1941,3602,770,0.350171
1,guo_readmit,30-Day Readmission,1588,486,5888,1556,4332,1102,0.264266
2,guo_icu,ICU Admission,1584,287,5380,360,5020,1297,0.066914
3,lab_thrombocytopenia,Thrombocytopenia,2077,917,71382,23214,48168,1160,0.325208
4,lab_hyperkalemia,Hyperkalemia,2029,462,79551,1919,77632,1567,0.024123
5,lab_hypoglycemia,Hypoglycemia,2051,527,127172,2000,125172,1524,0.015727
6,lab_hyponatremia,Hyponatremia,2029,1299,84601,24589,60012,730,0.290647
7,lab_anemia,Anemia,2084,1485,73110,50606,22504,599,0.69219
8,new_hypertension,Hypertension,872,171,2184,346,1838,701,0.158425
9,new_hyperlipidemia,Hyperlipidemia,1017,190,3076,389,2687,827,0.126463


In [19]:
# Val
results['val']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,1520,732,4437,1440,2997,788,0.324544
1,guo_readmit,30-Day Readmission,1468,422,4693,1043,3650,1046,0.222246
2,guo_icu,ICU Admission,1462,234,4258,304,3954,1228,0.071395
3,lab_thrombocytopenia,Thrombocytopenia,1975,809,56498,18201,38297,1166,0.322153
4,lab_hyperkalemia,Hyperkalemia,1928,425,62838,1429,61409,1503,0.022741
5,lab_hypoglycemia,Hypoglycemia,1952,436,101100,1614,99486,1516,0.015964
6,lab_hyponatremia,Hyponatremia,1929,1178,67048,18174,48874,751,0.27106
7,lab_anemia,Anemia,1986,1387,58272,39705,18567,599,0.681374
8,new_hypertension,Hypertension,845,163,2009,290,1719,682,0.14435
9,new_hyperlipidemia,Hyperlipidemia,937,173,2505,279,2226,764,0.111377


In [20]:
# Test
results['test']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,1514,745,4691,1557,3134,769,0.331912
1,guo_readmit,30-Day Readmission,1458,393,4964,1161,3803,1065,0.233884
2,guo_icu,ICU Admission,1462,235,4513,307,4206,1227,0.068026
3,lab_thrombocytopenia,Thrombocytopenia,1996,861,58060,19622,38438,1135,0.337961
4,lab_hyperkalemia,Hyperkalemia,1951,406,65747,1577,64170,1545,0.023986
5,lab_hypoglycemia,Hypoglycemia,1965,456,104874,1446,103428,1509,0.013788
6,lab_hyponatremia,Hyponatremia,1951,1230,69187,19757,49430,721,0.285559
7,lab_anemia,Anemia,2000,1409,59886,40967,18919,591,0.684083
8,new_hypertension,Hypertension,836,171,2061,274,1787,665,0.132945
9,new_hyperlipidemia,Hyperlipidemia,960,173,2471,331,2140,787,0.133954


# Readmission Triaging

In [82]:
df_labels = pd.read_csv('/Users/mwornow/Desktop/ehrshot-benchmark/assets/orig_labels/guo_readmit_labels.csv')
df_labels

Unnamed: 0,patient_id,prediction_time,value,label_type,boolean_value,integer_value,categorical_value,float_value
0,68184590,2135-03-03 23:59:59,False,boolean,False,,,
1,69266662,2191-01-27 23:59:59,True,boolean,True,,,
2,69266662,2191-02-27 23:59:59,False,boolean,False,,,
3,68466441,2249-10-03 23:59:59,False,boolean,False,,,
4,72408337,2176-06-17 23:59:59,False,boolean,False,,,
...,...,...,...,...,...,...,...,...
6917,30136210,2193-08-16 23:59:59,False,boolean,False,,,
6918,30239925,2267-02-22 23:59:59,False,boolean,False,,,
6919,30248260,2259-03-30 23:59:59,False,boolean,False,,,
6920,30211932,2230-01-15 23:59:59,False,boolean,False,,,


In [83]:
df_labels.groupby(['patient_id']).count().value_counts(['boolean_value'])

boolean_value
1                2331
2                 696
3                 264
4                 143
5                  67
6                  46
7                  35
8                  19
9                  11
10                  6
13                  5
16                  4
12                  4
15                  3
11                  3
18                  3
26                  2
17                  2
23                  2
31                  1
28                  1
27                  1
14                  1
22                  1
21                  1
20                  1
64                  1
Name: count, dtype: int64

In [84]:
df_labels.value_counts(['patient_id'])

patient_id
31412056      64
31654374      31
31680439      28
31029069      27
30895390      26
              ..
31030011       1
31030036       1
31032905       1
31039948       1
96958127       1
Name: count, Length: 3654, dtype: int64

In [78]:
import datasets
dataset = datasets.Dataset.from_parquet('/Users/mwornow/Downloads/ehrshot-meds-standard-stanford/data/*.parquet')
dataset

  from .autonotebook import tqdm as notebook_tqdm


Dataset({
    features: ['patient_id', 'events'],
    num_rows: 6732
})

In [81]:
from femr.hf_utils import dump_patient_to_json
dump_patient_to_json(dataset[dataset['patient_id'].index(115970265)], 'patient.json')

# Original

In [34]:
import json
results = {
    'train' : [],
    'test' : [],
    'val' : [],
    'all' : []
}
for task, task_name in tqdm(task_2_name.items()):
    path_to_task_csv: str = f"../../assets/orig_labels/{task}_labels.csv"
    if not os.path.exists(path_to_task_csv):
        print(f"Skipping {task_name} @ {path_to_task_csv}")
        continue
    df = pd.read_csv(path_to_task_csv)
    value_type = task_2_value_type[task]
    df['boolean_value'] = df['value'] if value_type == 'boolean' else ''
    df['integer_value'] = df['value'].astype(int) if value_type == 'multiclass' else ''
    df['categorical_value'] = df['value'].astype(int) if value_type == 'multilabel' else ''
    df['float_value'] = df['value'] if value_type == 'float' else ''
    df.to_csv(path_to_task_csv, index=False)
    if task_2_value_type[task] == "boolean":
        df['is_positive_label'] = df["boolean_value"]      
    elif task_2_value_type[task] == "multiclass":
        df['is_positive_label'] = df["integer_value"] > 0
    elif task_2_value_type[task] == "multilabel":
        df['is_positive_label'] = df["categorical_value"] != 8192
    else:
        print(f"Skipping {task_name}")
        continue
    
    # Splits
    orig_splits = json.load(open(f"../../assets/orig_labels/splits_omop_ids.json"))
    for split in ['train', 'test', 'val']:
        df_split = df[df['patient_id'].isin(orig_splits[split])]
        results[split].append({
            'task' : task,
            'task_name' : task_name,
            'n_patients' : df_split['patient_id'].nunique(),
            'n_positive_patients' : df_split.groupby('patient_id')['is_positive_label'].max().sum(),
            'n_labels' : df_split.shape[0],
            'n_positive_labels' : df_split['is_positive_label'].sum(),
        })
    
    # All
    results['all'].append({
        'task' : task,
        'task_name' : task_name,
        'n_patients' : df['patient_id'].nunique(),
        'n_positive_patients' : df.groupby('patient_id')['is_positive_label'].max().sum(),
        'n_labels' : df.shape[0],
        'n_positive_labels' : df['is_positive_label'].sum(),
    })

for key in results.keys():
    results[key] = pd.DataFrame(results[key])
    results[key]['n_negative_labels'] = results[key]['n_labels'] - results[key]['n_positive_labels']
    results[key]['n_negative_patients'] = results[key]['n_patients'] - results[key]['n_positive_patients']
    results[key]['label_prevalence'] = results[key]['n_positive_labels'] / results[key]['n_labels']

100%|██████████| 15/15 [00:02<00:00,  5.38it/s]


In [35]:
# All splits
results['all']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,3926,1252,7101,1903,5198,2674,0.26799
1,guo_readmit,30-Day Readmission,3654,484,6922,940,5982,3170,0.135799
2,guo_icu,ICU Admission,3617,253,6444,296,6148,3364,0.045934
3,lab_thrombocytopenia,Thrombocytopenia,5540,2278,160900,53845,107055,3262,0.334649
4,lab_hyperkalemia,Hyperkalemia,5458,1308,191772,5269,186503,4150,0.027475
5,lab_hypoglycemia,Hypoglycemia,5616,1434,292082,6224,285858,4182,0.021309
6,lab_hyponatremia,Hyponatremia,5459,3386,200416,56917,143499,2073,0.283994
7,lab_anemia,Anemia,5582,3881,172768,117097,55671,1701,0.67777
8,new_hypertension,Hypertension,2647,385,4238,535,3703,2262,0.126239
9,new_hyperlipidemia,Hyperlipidemia,2946,401,4944,543,4401,2545,0.10983


In [36]:
# Train
results['train']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,237,107,603,179,424,130,0.296849
1,guo_readmit,30-Day Readmission,239,48,624,94,530,191,0.150641
2,guo_icu,ICU Admission,234,19,575,22,553,215,0.038261
3,lab_thrombocytopenia,Thrombocytopenia,265,166,16937,5359,11578,99,0.316408
4,lab_hyperkalemia,Hyperkalemia,263,72,17827,464,17363,191,0.026028
5,lab_hypoglycemia,Hypoglycemia,264,110,27321,529,26792,154,0.019362
6,lab_hyponatremia,Hyponatremia,263,222,18608,5305,13303,41,0.285092
7,lab_anemia,Anemia,265,235,17060,12474,4586,30,0.731184
8,new_hypertension,Hypertension,144,31,285,43,242,113,0.150877
9,new_hyperlipidemia,Hyperlipidemia,169,28,423,49,374,141,0.115839


In [37]:
# Val
results['val']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,266,134,626,214,412,132,0.341853
1,guo_readmit,30-Day Readmission,266,50,637,99,538,216,0.155416
2,guo_icu,ICU Admission,258,35,582,40,542,223,0.068729
3,lab_thrombocytopenia,Thrombocytopenia,377,218,19897,6969,12928,159,0.350254
4,lab_hyperkalemia,Hyperkalemia,381,123,22836,525,22311,258,0.02299
5,lab_hypoglycemia,Hypoglycemia,378,112,37337,587,36750,266,0.015722
6,lab_hyponatremia,Hyponatremia,379,295,24052,6765,17287,84,0.281266
7,lab_anemia,Anemia,378,304,20671,15053,5618,74,0.728218
8,new_hypertension,Hypertension,138,45,322,68,254,93,0.21118
9,new_hyperlipidemia,Hyperlipidemia,170,38,382,47,335,132,0.123037


In [38]:
# Test
results['test']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,guo_los,Long LOS,269,124,703,193,510,145,0.274538
1,guo_readmit,30-Day Readmission,275,54,726,114,612,221,0.157025
2,guo_icu,ICU Admission,259,30,662,36,626,229,0.054381
3,lab_thrombocytopenia,Thrombocytopenia,385,219,20861,7138,13723,166,0.34217
4,lab_hyperkalemia,Hyperkalemia,388,118,24275,687,23588,270,0.028301
5,lab_hypoglycemia,Hypoglycemia,388,120,38729,544,38185,268,0.014046
6,lab_hyponatremia,Hyponatremia,388,307,25355,7902,17453,81,0.311655
7,lab_anemia,Anemia,387,318,21575,15478,6097,69,0.717404
8,new_hypertension,Hypertension,148,51,361,65,296,97,0.180055
9,new_hyperlipidemia,Hyperlipidemia,170,50,335,66,269,120,0.197015


# Old

In [38]:
# All splits
results['all']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,longlos,Long LOS,4636,1772,14723,3286,11437,2864,0.223188
1,30dayreadmit,30-Day Readmission,4777,2363,32725,17327,15398,2414,0.529473
2,icuadmit,ICU Admission,4421,587,13915,713,13202,3834,0.05124
3,thrombocytopenia,Thrombocytopenia,6076,2605,206099,66168,139931,3471,0.32105
4,hyperkalemia,Hyperkalemia,5948,1310,234290,5535,228755,4638,0.023625
5,hypoglycemia,Hypoglycemia,5993,1422,356066,5431,350635,4571,0.015253
6,hyponatremia,Hyponatremia,5937,3735,252578,71111,181467,2202,0.281541
7,anemia,Anemia,6096,4308,214483,147625,66858,1788,0.688283
8,hypertension,Hypertension,2829,747,12452,3232,9220,2082,0.259557
9,hyperlipidemia,Hyperlipidemia,3250,810,16213,3703,12510,2440,0.228397


In [39]:
# Train
results['train']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,longlos,Long LOS,1620,645,5562,1267,4295,975,0.227796
1,30dayreadmit,30-Day Readmission,1674,850,12302,6628,5674,824,0.538774
2,icuadmit,ICU Admission,1560,228,5298,271,5027,1332,0.051151
3,thrombocytopenia,Thrombocytopenia,2090,922,79303,24989,54314,1168,0.315108
4,hyperkalemia,Hyperkalemia,2046,469,89381,2202,87179,1577,0.024636
5,hypoglycemia,Hypoglycemia,2063,528,135600,2123,133477,1535,0.015656
6,hyponatremia,Hyponatremia,2043,1310,96298,27665,68633,733,0.287285
7,anemia,Anemia,2096,1495,81941,56961,24980,601,0.695147
8,hypertension,Hypertension,955,258,4158,1055,3103,697,0.253728
9,hyperlipidemia,Hyperlipidemia,1118,279,6044,1242,4802,839,0.205493


In [40]:
# Val
results['val']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,longlos,Long LOS,1511,548,4440,979,3461,963,0.220495
1,30dayreadmit,30-Day Readmission,1556,758,10251,5447,4804,798,0.531363
2,icuadmit,ICU Admission,1427,178,4167,221,3946,1249,0.053036
3,thrombocytopenia,Thrombocytopenia,1983,814,61955,19712,42243,1169,0.318166
4,hyperkalemia,Hyperkalemia,1940,430,69919,1566,68353,1510,0.022397
5,hypoglycemia,Hypoglycemia,1957,437,107605,1758,105847,1520,0.016338
6,hyponatremia,Hyponatremia,1935,1186,75961,20541,55420,749,0.270415
7,anemia,Anemia,1993,1394,64634,43992,20642,599,0.680632
8,hypertension,Hypertension,940,241,4246,1104,3142,699,0.260009
9,hyperlipidemia,Hyperlipidemia,1058,270,5173,1189,3984,788,0.229847


In [41]:
# Test
results['test']

Unnamed: 0,task,task_name,n_patients,n_positive_patients,n_labels,n_positive_labels,n_negative_labels,n_negative_patients,label_prevalence
0,longlos,Long LOS,1505,579,4721,1040,3681,926,0.220292
1,30dayreadmit,30-Day Readmission,1547,755,10172,5252,4920,792,0.516319
2,icuadmit,ICU Admission,1434,181,4450,221,4229,1253,0.049663
3,thrombocytopenia,Thrombocytopenia,2003,869,64841,21467,43374,1134,0.331071
4,hyperkalemia,Hyperkalemia,1962,411,74990,1767,73223,1551,0.023563
5,hypoglycemia,Hypoglycemia,1973,457,112861,1550,111311,1516,0.013734
6,hyponatremia,Hyponatremia,1959,1239,80319,22905,57414,720,0.285175
7,anemia,Anemia,2007,1419,67908,46672,21236,588,0.687283
8,hypertension,Hypertension,934,248,4048,1073,2975,686,0.265069
9,hyperlipidemia,Hyperlipidemia,1074,261,4996,1272,3724,813,0.254604


# Celiac investigation

In [21]:
df_old = pd.read_csv('/Users/mwornow/Desktop/ehrshot-benchmark/assets/old_labels/new_celiac_labels.csv')
df_new = pd.read_csv('/Users/mwornow/Desktop/ehrshot-benchmark/assets/labels/new_celiac_labels.csv')

In [22]:
df_old

Unnamed: 0,patient_id,prediction_time,boolean_value
0,115967670,2018-06-01 23:59:00.000000,False
1,115967670,2020-08-01 23:59:00.000000,False
2,115967707,2017-03-27 23:59:00.000000,False
3,115967707,2019-07-11 23:59:00.000000,False
4,115967707,2020-11-16 23:59:00.000000,False
...,...,...,...
28927,115973643,2014-08-28 23:59:00.000000,False
28928,115973643,2014-10-30 23:59:00.000000,False
28929,115973643,2014-12-20 23:59:00.000000,False
28930,115973643,2015-02-21 23:59:00.000000,False


In [23]:
from femr.hf_utils import dump_patient_to_json
dump_patient_to_json(dataset[dataset['patient_id'].index(115968001)], 'new_patient.json')

In [9]:
dataset_old = datasets.Dataset.from_parquet('/Users/mwornow/Downloads/ehrshot-meds-standard/data/*.parquet')
dump_patient_to_json(dataset_old[dataset_old['patient_id'].index(115968001)], 'patient.json')

In [10]:
from femr.labelers.ehrshot import CeliacDiseaseCodeLabeler, get_inpatient_admission_discharge_times
labeler = CeliacDiseaseCodeLabeler(ontology)
# labeler.get_outcome_times(dataset[dataset['patient_id'].index(115967670)])
get_inpatient_admission_discharge_times(dataset[dataset['patient_id'].index(115967670)], ontology)

[(datetime.datetime(2018, 6, 1, 16, 5), datetime.datetime(2018, 6, 1, 17, 50))]

In [11]:
df_new

Unnamed: 0,patient_id,prediction_time,boolean_value,integer_value,float_value,categorical_value
0,115967670,2018-06-01 23:59:00.000000,False,,,
1,115967707,2017-03-27 23:59:00.000000,False,,,
2,115967707,2020-11-18 23:59:00.000000,False,,,
3,115967707,2021-03-29 23:59:00.000000,False,,,
4,115967727,2010-04-28 23:59:00.000000,True,,,
...,...,...,...,...,...,...
7995,115973643,2014-08-25 23:59:00.000000,False,,,
7996,115973643,2014-08-28 23:59:00.000000,False,,,
7997,115973643,2014-10-30 23:59:00.000000,False,,,
7998,115973643,2014-12-20 23:59:00.000000,False,,,
