<a href="https://colab.research.google.com/github/okechukwuchude/Automating-Medical-Coding/blob/main/data_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import re
import itertools
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
#ICD codes and meaningg
D_ICD_DIAG = pd.read_csv('/content/drive/MyDrive/#medical coding/mimic-iii-clinical-database-carevue-subset-1.4/NOTEEVENTS.csv.gz',compression='gzip')
D_ICD_PROC = pd.read_csv('/content/drive/MyDrive/#medical coding/mimic-iii-clinical-database-carevue-subset-1.4/D_ICD_PROCEDURES.csv.gz',compression='gzip')

In [3]:
#original ICD files
diagnoses_icd = pd.read_csv('/content/drive/MyDrive/#medical coding/mimic-iii-clinical-database-carevue-subset-1.4/DIAGNOSES_ICD.csv.gz',compression='gzip')
procedures_icd = pd.read_csv('/content/drive/MyDrive/#medical coding/mimic-iii-clinical-database-carevue-subset-1.4/PROCEDURES_ICD.csv.gz',compression='gzip')

In [4]:
diagnoses_icd.head(5)

Unnamed: 0,row_id,subject_id,hadm_id,seq_num,icd9_code
0,1,2,163353,1.0,V3001
1,2,2,163353,2.0,V053
2,3,2,163353,3.0,V290
3,4,3,145834,1.0,0389
4,5,3,145834,2.0,78559


In [5]:
diagnoses_icd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225345 entries, 0 to 225344
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   row_id      225345 non-null  int64  
 1   subject_id  225345 non-null  int64  
 2   hadm_id     225345 non-null  int64  
 3   seq_num     225339 non-null  float64
 4   icd9_code   225339 non-null  object 
dtypes: float64(1), int64(3), object(1)
memory usage: 8.6+ MB


In [6]:
#Read notes source file
notes = pd.read_csv('/content/drive/MyDrive/#medical coding/mimic-iii-clinical-database-carevue-subset-1.4/NOTEEVENTS.csv.gz', compression='gzip')
keep = notes[['hadm_id','category','text']]

In [7]:
keep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 880107 entries, 0 to 880106
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   hadm_id   861038 non-null  float64
 1   category  880107 non-null  object 
 2   text      880107 non-null  object 
dtypes: float64(1), object(2)
memory usage: 20.1+ MB


In [8]:
len(diagnoses_icd['icd9_code'].unique())


5054

In [9]:
keep

Unnamed: 0,hadm_id,category,text
0,,Radiology,[**2119-1-4**] 12:59 PM\n ABDOMEN U.S. (COMPLE...
1,,Radiology,[**2119-1-9**] 1:05 PM\n MR LIVER WITH CONTRAS...
2,,Radiology,[**2119-1-16**] 9:24 PM\n CHEST (PORTABLE AP) ...
3,,Radiology,[**2119-1-18**] 1:24 PM\n CT ABD W&W/O C; CT P...
4,,Radiology,[**2119-1-18**] 3:45 PM\n PARACENTESIS DIAG. O...
...,...,...,...
880102,104049.0,Nursing/other,1. FEN\nTF= min140cc/k/d of BM/E24. min47cc q4...
880103,104049.0,Nursing/other,I have examined pt. & agree w/ [**First Name8 ...
880104,104049.0,Nursing/other,NPN nights\n\n\nFluids/Nutrition: Weight 2025...
880105,104049.0,Nursing/other,Attending Note\nDay of life 12 PMA 35 [**2-7**...


In [10]:
len(diagnoses_icd['hadm_id'].unique())

26836

In [11]:
#checking for null values
keep.isnull().sum()

hadm_id     19069
category        0
text            0
dtype: int64

In [12]:
# Dictionary to store diagnoses data with admission IDs as keys and lists of diagnosis ICD codes as values
diagnoses_dict = {}

# Iterate through each row in the diagnoses_icd DataFrame
for i in range(len(diagnoses_icd)):
    # Get the current row
    entry = diagnoses_icd.iloc[i]
    # Extract the admission ID and ICD code from the current row
    hadm = entry['hadm_id']
    icd = entry['icd9_code']
    # Check if the admission ID already exists in the diagnoses_dict
    if hadm not in diagnoses_dict:
        # If not, create a new entry with the admission ID as key and a list containing the ICD code as value
        diagnoses_dict[hadm] = [icd]
    else:
        # If the admission ID already exists, append the ICD code to the existing list of codes
        diagnoses_dict[hadm].append(icd)


In [13]:
# Dictionary to store procedures data with admission IDs as keys and lists of procedure ICD codes as values
procedures_dict = {}

# Iterate through each row in the procedures_icd DataFrame
for i in range(len(procedures_icd)):
    # Get the current row
    entry = procedures_icd.iloc[i]
    # Extract the admission ID and ICD code from the current row
    hadm = entry['hadm_id']
    icd = entry['icd9_code']
    # Check if the admission ID already exists in the procedures_dict
    if hadm not in procedures_dict:
        # If not, create a new entry with the admission ID as key and a list containing the ICD code as value
        procedures_dict[hadm] = [icd]
    else:
        # If the admission ID already exists, append the ICD code to the existing list of codes
        procedures_dict[hadm].append(icd)

In [14]:
diagnoses_df = pd.DataFrame.from_dict(diagnoses_dict,orient='index')
procedures_df = pd.DataFrame.from_dict(procedures_dict,orient='index')

In [15]:
diagnoses_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,38
163353,V3001,V053,V290,,,,,,,,...,,,,,,,,,,
145834,0389,78559,5849,4275,41071,4280,6826,4254,2639,,...,,,,,,,,,,
178980,V3000,V053,V290,,,,,,,,...,,,,,,,,,,
118037,V3001,V053,V290,,,,,,,,...,,,,,,,,,,
159514,V3001,7706,7746,V290,V502,V053,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128876,V3000,V290,V053,,,,,,,,...,,,,,,,,,,
105824,V3001,76502,7705,7702,769,7742,7793,7756,7470,77082,...,,,,,,,,,,
104049,V3000,7731,76517,76527,V290,V053,V502,,,,...,,,,,,,,,,
140728,41401,42822,78820,44021,496,25000,4414,4280,4019,3051,...,,,,,,,,,,


In [16]:
# Rename the columns of the diagnoses DataFrame to include a prefix 'DIAG_CODE' followed by a number for each column
diagnoses_df.columns = ['DIAG_CODE'+str(i) for i in range(1, len(diagnoses_df.columns) + 1)]

# Set the name of the index of the diagnoses DataFrame to 'HADM_ID'
diagnoses_df.index.name = 'hadm_id'

# Rename the columns of the procedures DataFrame to include a prefix 'PRCD_CODE' followed by a number for each column
procedures_df.columns = ['PRCD_CODE'+str(i) for i in range(1, len(procedures_df.columns) + 1)]

# Set the name of the index of the procedures DataFrame to 'HADM_ID'
procedures_df.index.name = 'hadm_id'

# Merge the diagnoses and procedures DataFrames using an outer join based on the 'HADM_ID' column
# The resulting DataFrame will contain all unique 'HADM_ID' values from both DataFrames,
# with diagnosis and procedure codes aligned accordingly. Missing values will be filled with NaN.
codes_df = pd.merge(diagnoses_df, procedures_df, how='outer', on='hadm_id')


In [17]:
# For each row in the diagnoses DataFrame, join all non-null entries (diagnosis codes) into a single string, separated by commas
diagnoses_df['DIAG_CODES'] = diagnoses_df[diagnoses_df.columns[:]].apply(
    lambda x: ','.join(x.dropna().astype(str)),
    axis=1
)

# For each row in the procedures DataFrame, join all non-null entries (procedure codes) into a single string, separated by commas
procedures_df['PROC_CODES'] = procedures_df[procedures_df.columns[:]].apply(
    lambda x: ','.join(x.dropna().astype(str)),
    axis=1
)


In [18]:
# Extract the 'DIAG_CODES' column from the diagnoses DataFrame
diagnoses = diagnoses_df[['DIAG_CODES']]

# Extract the 'PROC_CODES' column from the procedures DataFrame
procedures = procedures_df[['PROC_CODES']]

# Merge the 'DIAG_CODES' and 'PROC_CODES' DataFrames using an outer join based on the 'HADM_ID' column
codes = pd.merge(diagnoses, procedures, how='outer', on='hadm_id')

# Drop any rows with missing values (NaN) from the merged DataFrame
codes = codes.dropna()


In [19]:
codes.to_csv('CODES.csv')

In [20]:
print(keep.columns)

Index(['hadm_id', 'category', 'text'], dtype='object')


In [21]:
print(codes.columns)

Index(['DIAG_CODES', 'PROC_CODES'], dtype='object')


In [22]:
# Merge the 'KEEP' DataFrame with the 'codes' DataFrame using a left join based on the 'HADM_ID' column
merged_df = pd.merge(keep, codes, how='left', on='hadm_id')

# Drop any rows with missing values (NaN) from the merged DataFrame
merged_df = merged_df.dropna()
merged_df= merged_df.set_index('hadm_id')


In [23]:
merged_df

Unnamed: 0_level_0,category,text,DIAG_CODES,PROC_CODES
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
102314.0,Radiology,"[**2119-2-3**] 4:10 PM\n US ABD LIMIT, SINGLE ...",57220705457155724518550709985945385849,"5059.0,311.0,3324.0,9672.0,5011.0,3893.0,3995...."
102314.0,Radiology,"[**2119-2-4**] 10:38 AM\n US ABD LIMIT, SINGLE...",57220705457155724518550709985945385849,"5059.0,311.0,3324.0,9672.0,5011.0,3893.0,3995...."
185945.0,Radiology,[**2167-2-14**] 10:41 AM\n CHEST (PA & LAT) ...,4240428051881425442611997142731423999811,"3524.0,3403.0,3409.0,3961.0,3723.0,8856.0,8853..."
185945.0,Radiology,[**2167-2-16**] 7:50 PM\n CHEST (PORTABLE AP) ...,4240428051881425442611997142731423999811,"3524.0,3403.0,3409.0,3961.0,3723.0,8856.0,8853..."
185945.0,Radiology,[**2167-2-17**] 2:24 AM\n CHEST (PORTABLE AP) ...,4240428051881425442611997142731423999811,"3524.0,3403.0,3409.0,3961.0,3723.0,8856.0,8853..."
...,...,...,...,...
104049.0,Nursing/other,1. FEN\nTF= min140cc/k/d of BM/E24. min47cc q4...,"V3000,7731,76517,76527,V290,V053,V502","640.0,9983.0,9955.0"
104049.0,Nursing/other,I have examined pt. & agree w/ [**First Name8 ...,"V3000,7731,76517,76527,V290,V053,V502","640.0,9983.0,9955.0"
104049.0,Nursing/other,NPN nights\n\n\nFluids/Nutrition: Weight 2025...,"V3000,7731,76517,76527,V290,V053,V502","640.0,9983.0,9955.0"
104049.0,Nursing/other,Attending Note\nDay of life 12 PMA 35 [**2-7**...,"V3000,7731,76517,76527,V290,V053,V502","640.0,9983.0,9955.0"


In [24]:
sample = merged_df.sample(n=20000)


In [25]:
sample.to_csv('sample_20k.csv')


In [26]:
sample.columns

Index(['category', 'text', 'DIAG_CODES', 'PROC_CODES'], dtype='object')

In [27]:
sample

Unnamed: 0_level_0,category,text,DIAG_CODES,PROC_CODES
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
136282.0,Nursing/other,Nursing addendum :\nCV: R. sub-clavian triple ...,4140142405119427314824150705185428099662,"311.0,4311.0,3324.0,3615.0,3611.0,3512.0,9604...."
143293.0,Nursing/other,npn 7a-7p\n\n\n1.) FEN: TF min of 130cc/kg/d o...,"V3000,76503,769,7707,7793,7742,3229,77211,7718...","9604.0,9671.0,9390.0,9915.0,966.0,3891.0,3892...."
196821.0,Nursing/other,NPN 7a-7p\n\n\n#2: [**Known lastname 3506**] r...,"V3101,76515,7707,7470,76383,36221,V290,V053","9604.0,9671.0,9955.0"
103303.0,Radiology,[**2187-11-24**] 7:09 AM\n UNILAT UP EXT VEINS...,"99661,03811,99592,5849,99812,4210,2851,9972,45...","3799.0,9904.0"
139407.0,Nursing/other,NPN 1900-0700\n\n\n1. G/D: Temps stable on ser...,"V3000,7722,7790,7633,7661,V502,V053,V290","331.0,640.0,9915.0,966.0"
...,...,...,...,...
188486.0,Nursing/other,NPN-MICU\nMs [**Known lastname 8621**] has mad...,25013518815845535514107125053250734019,"3723.0,8853.0,8856.0,3893.0,3324.0,9672.0,9604.0"
197613.0,Radiology,[**2132-12-18**] 3:55 PM\n CHEST (PORTABLE AP)...,410113989199714275396313878820427312449,"3601.0,3607.0,3723.0,8856.0,9920.0,9962.0,9904.0"
116969.0,Nursing/other,NURSING MICU NOTE 7P-7A\n\nPT WAS [**Name (NI)...,"4464,51884,7863,486,2851,70703,5845,4280,5770,...","3979.0,311.0,9971.0,9702.0,9928.0,8611.0,3893...."
131263.0,Nursing/other,npn 1900-0700\n\n\n#2 resp\ninfant continues i...,"03811,99591,V090,7742,769,7793,76525,76514,779...","966.0,9921.0,9955.0,9955.0"


**Codes To Dictionary**

In [28]:
sample_ids = sample.index


In [29]:
flt_diag = diagnoses_icd[diagnoses_icd['hadm_id'].isin(sample_ids)]
flt_proc = procedures_icd[procedures_icd['hadm_id'].isin(sample_ids)]

In [30]:
diag_keep = flt_diag['icd9_code'].value_counts()[:300]
proc_keep = flt_proc['icd9_code'].value_counts()[:100]


In [31]:
diag2idx, idx2diag = {},{}
for d in diag_keep.index:
    if d not in diag2idx:
        idx2diag[len(idx2diag)] = d
        diag2idx[d] = len(diag2idx)

proc2idx, idx2proc = {},{}
for p in proc_keep.index:
    if p not in proc2idx:
        idx2proc[len(idx2proc)] = p
        proc2idx[p] = len(proc2idx)

In [32]:
with open('diag2idx.pickle','wb') as f:
    pickle.dump(diag2idx,f,pickle.HIGHEST_PROTOCOL)
with open('idx2diag.pickle','wb') as f:
    pickle.dump(idx2diag,f,pickle.HIGHEST_PROTOCOL)
with open('proc2idx.pickle','wb') as f:
    pickle.dump(proc2idx,f,pickle.HIGHEST_PROTOCOL)
with open('idx2proc.pickle','wb') as f:
    pickle.dump(idx2proc,f,pickle.HIGHEST_PROTOCOL)

Convert CODE TO LIST

```
# This is formatted as code
```



In [33]:

def diag_code2idx(org_lst):
    coded_lst = []
    for c in org_lst.split(','):
        if c in diag2idx:
            coded_lst.append(diag2idx[c])
    return coded_lst

In [34]:
def proc_code2idx(org_lst):
    coded_lst = []
    for c in org_lst.split(','):
        c_ = int(str(c).split('.')[0])
        if c_ in proc2idx:
            coded_lst.append(proc2idx[c_])

    return coded_lst

In [35]:
sample['CODED_DIAG'] = sample['DIAG_CODES'].apply(diag_code2idx)
sample['CODED_PROC'] = sample['PROC_CODES'].apply(proc_code2idx)

In [36]:
sample

Unnamed: 0_level_0,category,text,DIAG_CODES,PROC_CODES,CODED_DIAG,CODED_PROC
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
136282.0,Nursing/other,Nursing addendum :\nCV: R. sub-clavian triple ...,4140142405119427314824150705185428099662,"311.0,4311.0,3324.0,3615.0,3611.0,3512.0,9604....","[7, 44, 34, 4, 40, 16, 22, 1, 51]","[12, 20, 13, 19, 52, 2, 3, 23, 29, 33, 14, 0, 10]"
143293.0,Nursing/other,npn 7a-7p\n\n\n1.) FEN: TF min of 130cc/kg/d o...,"V3000,76503,769,7707,7793,7742,3229,77211,7718...","9604.0,9671.0,9390.0,9915.0,966.0,3891.0,3892....","[17, 123, 8, 93, 14, 3, 163, 83, 224, 59, 245,...","[2, 7, 9, 6, 0, 10, 31, 8, 16, 5]"
196821.0,Nursing/other,NPN 7a-7p\n\n\n#2: [**Known lastname 3506**] r...,"V3101,76515,7707,7470,76383,36221,V290,V053","9604.0,9671.0,9955.0","[19, 63, 93, 45, 99, 2, 5]","[2, 7, 5]"
103303.0,Radiology,[**2187-11-24**] 7:09 AM\n UNILAT UP EXT VEINS...,"99661,03811,99592,5849,99812,4210,2851,9972,45...","3799.0,9904.0","[111, 21, 9, 82, 238, 33, 96, 4, 94, 1, 0, 218]",[4]
139407.0,Nursing/other,NPN 1900-0700\n\n\n1. G/D: Temps stable on ser...,"V3000,7722,7790,7633,7661,V502,V053,V290","331.0,640.0,9915.0,966.0","[17, 208, 25, 5, 2]","[16, 17, 6, 0]"
...,...,...,...,...,...,...
188486.0,Nursing/other,NPN-MICU\nMs [**Known lastname 8621**] has mad...,25013518815845535514107125053250734019,"3723.0,8853.0,8856.0,3893.0,3324.0,9672.0,9604.0","[6, 28, 29, 0]","[24, 33, 14, 1, 13, 3, 2]"
197613.0,Radiology,[**2132-12-18**] 3:55 PM\n CHEST (PORTABLE AP)...,410113989199714275396313878820427312449,"3601.0,3607.0,3723.0,8856.0,9920.0,9962.0,9904.0","[212, 156, 43, 55, 298, 169, 4, 38]","[42, 53, 24, 14, 41, 50, 4]"
116969.0,Nursing/other,NURSING MICU NOTE 7P-7A\n\nPT WAS [**Name (NI)...,"4464,51884,7863,486,2851,70703,5845,4280,5770,...","3979.0,311.0,9971.0,9702.0,9928.0,8611.0,3893....","[75, 184, 15, 33, 87, 28, 1, 104, 172, 128, 288]","[12, 91, 1, 13, 2, 3, 10, 0, 4, 28, 13, 13, 13..."
131263.0,Nursing/other,npn 1900-0700\n\n\n#2 resp\ninfant continues i...,"03811,99591,V090,7742,769,7793,76525,76514,779...","966.0,9921.0,9955.0,9955.0","[111, 146, 62, 3, 8, 14, 66, 88, 26]","[0, 5, 5]"


ROUTINE PROCESSING

In [37]:
def remove_stopwords(text):
        stop_words = set(stopwords.words("english"))
        word_tokens = word_tokenize(text)
        filtered_text = [word for word in word_tokens if word not in stop_words]
        return filtered_text

def preprocess(note):
    note = note.replace('\n',' ')
    note = note.replace('w/', 'with')
    note = note.lower() #lower case
    note = re.sub(r'\d+', '', note) #remove numbers
    note = note.translate(str.maketrans('', '', string.punctuation)) #remove punctuation
    note = " ".join(note.split())
    note = remove_stopwords(note)
    return note

In [49]:
# Create a sample of 1000 rows from the 'merged_df' DataFrame
sample_1k = merged_df.sample(n=1000, random_state=42)
sample_1k_removed = sample_1k.copy()

# Create a sample of 10000 rows from the 'merged_df' DataFrame
sample_10k = merged_df.sample(n=10000, random_state=42)
sample_10k_removed = sample_10k.copy()

# Create a copy of the 'merged_df' DataFrame
merged_df_removed = merged_df.copy()

In [50]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')

sample_1k_removed['note'] = sample_1k['text'].apply(preprocess)
sample_10k_removed['note'] = sample_10k['text'].apply(preprocess)
merged_df_removed['note'] = merged_df_removed['text'].apply(preprocess)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [52]:
print(sample_1k_removed.columns)


Index(['category', 'text', 'DIAG_CODES', 'PROC_CODES', 'note'], dtype='object')


In [53]:
sample_1k_cleaned = sample_1k_removed[['note','DIAG_CODES','PROC_CODES']]
sample_10k_cleaned = sample_10k_removed[['note','DIAG_CODES','PROC_CODES']]
merged_df_cleaned = merged_df_removed[['note','DIAG_CODES','PROC_CODES']]

In [54]:
sample_10k_removed.to_csv('SAMPLE10K_ALL.csv')
sample_1k_removed.to_csv('SAMPLE1K_ALL.csv')
merged_df_removed.to_csv('ALL.csv')

In [55]:
sample_20k = merged_df_removed.sample(n = 20000)
sample_20k.to_csv('SAMPLE20K_ALL.csv')

In [56]:
sample_20k

Unnamed: 0_level_0,category,text,DIAG_CODES,PROC_CODES,note
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
113714.0,Nursing/other,CONDITION UPDATE\nASSESSMENT:\n PT SLEEPING...,"1985,19889,1972,2724,4019,V103","8674.0,7781.0,3479.0","[condition, update, assessment, pt, sleeping, ..."
115326.0,Nursing/other,NPN 1900-0700\n62 yo fe adm at 1900 c cholang...,5693155119702875285934600311,9904.0,"[npn, yo, fe, adm, c, cholangiocarcinoma, sp, ..."
130724.0,Nursing/other,"Neonatology Attending\nDay 40, PMA 30 3\n\nSIM...","V3000,76503,7707,769,486,77181,7742,2760,2762,...","9672.0,9604.0,3891.0,3892.0,9915.0,9390.0,9604...","[neonatology, attending, day, pma, simv, x, ps..."
147409.0,Nursing/other,Rehab/OT\n\nMet with [**Doctor First Name 649*...,"V3001,76502,769,7707,03843,7702,7470,2721,7766","9604.0,9672.0,9915.0,9983.0,3885.0,3404.0,966....","[rehabot, met, doctor, first, name, parents, b..."
189024.0,Radiology,[**2156-9-30**] 10:38 PM\n CHEST (PORTABLE AP)...,"0389,78552,41071,4280,51882,5849,70709,6826,42...","3605.0,3607.0,3722.0,8855.0,3722.0,8856.0,9604...","[pm, chest, portable, ap, clip, clip, number, ..."
...,...,...,...,...,...
130409.0,Nursing/other,RESP CARE: Pt remains intubated/on vent on set...,"41401,4280,25000,4019,2449,V4501","66.0,3606.0,45.0,40.0,8856.0,9920.0,9604.0,967...","[resp, care, pt, remains, intubatedon, vent, s..."
164955.0,Nursing/other,Respiratory Care Note\nPt. continues on 6cmh2O...,"V3001,76502,769,7742,77181,7793,7707,55012,777...","5300.0,5300.0,9672.0,9604.0,9390.0,9915.0,9904...","[respiratory, care, note, pt, continues, cmho,..."
188755.0,Nursing/other,ccu nsg progress note.\no:confused. self dced ...,"41071,51882,4280,5070,42731,03811,99592,78552,...","3761.0,8841.0,3605.0,3607.0,3722.0,8856.0,9920...","[ccu, nsg, progress, note, oconfused, self, dc..."
112397.0,Nursing/other,NPN 0700-1900:\nEvents: This is a 55 yo lady w...,"51884,99681,5845,25041,5990,5856,40391,4280,42...","9672.0,3995.0,3895.0,3322.0,966.0,14.0,8872.0,...","[npn, events, yo, lady, presented, yesterday, ..."
