In [1]:
# path for input and target data tables

diagnosis_path = '../data/diagnosis.csv'
discharge_path ='../data/discharge.csv'
edstays_path = '../data/edstays.csv'
radiology_path = '../data/radiology.csv'
triage_path = '../data/triage.csv'
target_path = '../data/discharge_target.csv'
discharge_sections_path = 'discharge_sections.csv'
radiology_sections_path = 'radiology_sections.csv'

import pandas as pd
import re
import os

# read data
diagnosis_df = pd.read_csv(diagnosis_path, keep_default_na=False)
discharge_df = pd.read_csv(discharge_path, keep_default_na=False)
edstays_df = pd.read_csv(edstays_path, keep_default_na=False)
radiology_df = pd.read_csv(radiology_path, keep_default_na=False)
triage_df = pd.read_csv(triage_path, keep_default_na=False)
target_df = pd.read_csv(target_path, keep_default_na=False)

discharge_sections_df = pd.read_csv(discharge_sections_path, keep_default_na=False)
radiology_sections_df = pd.read_csv(radiology_sections_path, keep_default_na=False)


In [2]:
print(discharge_df['text'].iloc[0])

 
Name:  ___             Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   F
 
Service: MEDICINE
 
Allergies: 
IV Dye, Iodine Containing Contrast Media / Oxycodone / 
cilostazol / Varenicline
 
Attending: ___.
 
Chief Complaint:
Shortness of Breath

 
Major Surgical or Invasive Procedure:
N/A

 
History of Present Illness:
Ms. ___ is a ___ female with history of 
COPD on home O2, atrial fibrillation on apixaban, hypertension, 
CAD, and hyperlipidemia who presents with shortness of breath, 
cough, and wheezing for one day.

The patient reports shortness of breath, increased cough 
productive of ___ red-flected sputum, and wheezing since 
yesterday evening.  She has been using albuterol IH more 
frequently (___) with ipratropium nebs every 4 hours with 
minimal relief. She had to increase her O2 flow up to 4L without 
significant improvement. She was currently taking 10mg of 
prednisone. She has also been taking tiotropium I

In [6]:
print(radiology_df['text'].iloc[0])

EXAMINATION:
Chest:  Frontal and lateral views

INDICATION:  History: ___ with dyspnea  // eval for pneumonia

TECHNIQUE:  Chest:  Frontal and Lateral

COMPARISON:  ___

FINDINGS: 

Mild basilar atelectasis is seen without focal consolidation.  No pleural
effusion or pneumothorax is seen. The cardiac and mediastinal silhouettes are
stable.

IMPRESSION: 

Mild basilar atelectasis without definite focal consolidation.



In [7]:
radiology_df.head()

Unnamed: 0,note_id,subject_id,hadm_id,note_type,note_seq,charttime,storetime,text
0,10001884-RR-136,10001884,24962904,RR,136,2130-12-06 18:07:00,2130-12-06 19:36:00,EXAMINATION:\nChest: Frontal and lateral view...
1,10003019-RR-65,10003019,22774359,RR,65,2175-10-08 10:57:00,2175-10-08 13:20:00,"EXAM: Chest, frontal and lateral views.\n\nCL..."
2,10003019-RR-66,10003019,22774359,RR,66,2175-10-08 16:23:00,2175-10-08 16:54:00,"EXAM: Chest, single AP upright portable view...."
3,10003019-RR-67,10003019,22774359,RR,67,2175-10-11 15:28:00,2175-10-11 17:51:00,"AP CHEST, 3:34 P.M, ___\n\nHISTORY: ___ man w..."
4,10003019-RR-68,10003019,22774359,RR,68,2175-10-14 08:46:00,2175-10-14 09:36:00,REASON FOR EXAMINATION: Evaluation of the pat...


In [3]:
# segment radiology notes
def extract_text_by_subheading_radiology(text):
    headings_regex = {
        'EXAMINATION': r'(?:EXAMINATION:|EXAM:)\s*(.*?)(?=\n[A-Za-z ]+:|$)',
        'INDICATION':  r'(?:INDICATION:|CLINICAL INFORMATION:)\s*(.*?)(?=\n[A-Za-z ]+:|$)',
        'TECHNIQUE': r'(?:TECHNIQUE:)\s*(.*?)(?=\n[A-Za-z ]+:|$)',
        'COMPARISON': r'(?:COMPARISON)\s*(.*?)(?=\n[A-Za-z ]+:|$)',
        'FINDINGS': r'(?:FINDINGS:)\s*(.*?)(?=\n[A-Za-z ]+:|$)',
        'IMPRESSION': r'IMPRESSION:(.*)'
    }

    extracted_text = {}

    # Iterate over each subheading and extract corresponding text
    for heading, regex in headings_regex.items():
        match = re.search(regex, text, re.DOTALL)
        if match:
            extracted_text[heading] = match.group(1).strip()
        else:
            extracted_text[heading] = None

    return extracted_text

#apply function
radiology_sections_df = radiology_df['text'].apply(extract_text_by_subheading_radiology).apply(pd.Series)

# Concatenating the columns from radiology_df into radiology_sections_df
radiology_sections_df = pd.concat([radiology_df[['subject_id', 'hadm_id']], radiology_sections_df], axis=1)

In [8]:
radiology_sections_df.head()

Unnamed: 0.1,Unnamed: 0,subject_id,hadm_id,EXAMINATION,INDICATION,TECHNIQUE,COMPARISON,FINDINGS,IMPRESSION
0,0,10001884,24962904,Chest: Frontal and lateral views,History: ___ with dyspnea // eval for pneumonia,Chest: Frontal and Lateral,: ___,Mild basilar atelectasis is seen without focal...,Mild basilar atelectasis without definite foca...
1,1,10003019,22774359,"Chest, frontal and lateral views.","Fever, on chemotherapy.\n\n___.",,,Frontal and lateral views of the chest were ob...,
2,2,10003019,22774359,"Chest, single AP upright portable view.","Neutropenic fever, septic shock, worsening hyp...",,,There has been interval placement of a right i...,Interval placement of right internal jugular c...
3,3,10003019,22774359,,,,,,AP chest compared to ___:\n\nMultifocal pulmon...
4,4,10003019,22774359,Evaluation of the patient with Hodgkin's lymph...,,MDCT of the chest was obtained from thoracic i...,: ___ PET CT and chest radiograph from ___.,Aorta and pulmonary arteries are normal in dia...,1. No evidence of new pulmonary abnormalities...


In [4]:
# segment discharge notes

def extract_text_by_subheading_discharge(text):
    
    headings_regex = {
        'CC': r'(CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:)(.*?)(?=Service:|Specialty:|Unit:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|Major Surgical or Invasive Procedure:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Service': r'(Service:|Specialty:|Unit:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Major Surgical Procedure': r'(Major Surgical or Invasive Procedure:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'HPI': r'(HPI:|History of Present Illness:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'PMH': r'(PMH:|Past Medical History:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'SOC': r'(Social History:|SOC:|SH:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'FH': r'(Family History:|PFH:|FH:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Past Surgical History': r'(Past Surgical History:|PSH:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Problem List': r'(Problem List:|Problems:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Physical Exam': r'(Physical Exam:|PE:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Medication Lists': r'(Medication lists:|Admission Medications:|Medications on Admission:|Preadmission Medication list)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Pertinent Results': r'(Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'BHC': r'(BHC:|Brief Hospital Course:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Transitional Issues': r'(Transitional Issues:|Transitional Issues|TRANSITIONAL ISSUES:)(.*?)(?=CODE STATUS:|Disposition:|Dispo:|CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Disposition': r'(Disposition:|Dispo:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Discharge Instructions': r'(Discharge Instructions:|Patient Instructions:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Followup Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Followup Instructions': r'(Followup Instructions:)(.*?)(?=CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:|Service:|Specialty:|Unit:|Major Surgical or Invasive Procedure:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Transitional Issues:|Pertinent Results:|Pertinent Findings:|Discharge Diagnosis:$)',
        'Discharge Diagnosis': r'(Discharge Diagnosis:|Diagnosis:)(.*?)(?=Discharge Condition:|CC:|Chief Complaint:|Reason for admission:|Reason for hospital admission:| Service:|Specialty:|Unit:|Major Medical Procedures:|Major Procedures:|Major Surgeries:|HPI:|History of Present Illness:|PMH:|Past Medical History:|Social History:|SOC:|SH:|Family History:|PFH:|FH:|Past Surgical History:|PSH:|Problem List:|Problems:|Physical Exam:|PE:|Medication lists:|Admission Medications:|Medications on Admission:|Discharge Medications:|Medications on Discharge:|Pertinent Results:|Pertinent imaging:|Pertinent Labs:|Pertinent Microbiology:|Pertinent Micro:|Results:|BHC:|Brief Hospital Course:|Disposition:|Dispo:|Discharge Instructions:|Patient Instructions:|Followup Instructions:|Transitional Issues:$)',
    }

    extracted_text = {}
    for heading, regex in headings_regex.items():
        match = re.search(regex, text, re.DOTALL)
        if match:
            extracted_text[heading] = match.group(2).strip()
        else:
            extracted_text[heading] = None        

    return extracted_text

#apply function
discharge_sections_df = discharge_df['text'].apply(extract_text_by_subheading_discharge).apply(pd.Series)

# Concatenating the columns from radiology_df into radiology_sections_df
discharge_sections_df= pd.concat([discharge_df[['subject_id', 'hadm_id']], discharge_sections_df], axis=1)



In [9]:
print(target_df['discharge_instructions'].iloc[0])
print(target_df['brief_hospital_course'].iloc[0])


Dear Ms. ___,

You were admitted to ___ after you developed 
shortness of breath and wheezing at home shortly after your last 
discharge. You were treated for a COPD exacerbation and your 
breathing quickly got better. Our physical therapists evaluated 
you and recommended that you have a short stay at Pulmonary 
___ before going home to improve your breathing. 

We wish you all the best at rehab and send our condolences to 
your family on your recent loss.

It was truly a pleasure taking care of you.

Your ___ Team
Ms. ___ is a ___ female with history of 
COPD on home O2, atrial fibrillation on apixaban, hypertension, 
CAD, and hyperlipidemia who presents with shortness of breath, 
cough, and wheezing for one day. Pt recently DC'd from hospital 
for dyspnea, treated only w/nebs and steroids as not thought ___ 
true COPD exacerbation, c/f anxiety component. Pt re-admitted 
w/similar Sx, thought ___ COPD exacerbation, received nebs, 
steroids, azithromycin. Pt's wheezing, cough, SOB imp

In [2]:
target_df.head()

Unnamed: 0,note_id,hadm_id,discharge_instructions,brief_hospital_course,discharge_instructions_word_count,brief_hospital_course_word_count
0,10001884-DS-35,24962904,"Dear Ms. ___,\n\nYou were admitted to ___ afte...",Ms. ___ is a ___ female with history of \nCOPD...,87,358
1,10003019-DS-22,22774359,"Dear Mr. ___,\n\nIt has been our pleasure to b...",___ male with h/o Hodgkin's lymphoma C1D17 ABV...,140,78
2,10003299-DS-7,29323205,Dear ___ were hospitalized due to symptoms of ...,___ RH female with a PMHx of paramedian pontin...,242,386
3,10003502-DS-7,20459702,It was a pleasure caring for you at ___ \n___....,Hospitalization Summary: \nMs. ___ is an ___ y...,74,50
4,10004322-DS-21,28755331,"Dear Mr. ___,\n\nIt was our pleasure to care f...",___ with h/o psychosis admitted because of mul...,69,287


In [5]:
print(discharge_sections_df['Transitional Issues'].iloc[0])


[] For pt's continued COPD exacerbations, recommend finishing 5d 
course of Azithromycin, 250mg qd until ___
[] Recommend extended prednisone taper for pt, 5d 40mg 
Prednisone (to finish ___ followed by 10mg taper every 5 days 
(35mg from ___, 30mg ___, etc...). 
[] Would consider PCP prophylaxis with ___ if unable to wean 
prednisone to less than 20mg daily. 
[] Pt's SOB may have an anxiety component, may benefit from 
starting SSRI in addition to home benzos already prescribed

# CONTACT: Full Code
#


In [10]:
discharge_sections_df.to_csv('discharge_sections.csv')
radiology_sections_df.to_csv('radiology_sections.csv')

In [3]:
# !pip install langchain==0.1.1 openai==1.8.0 langchain-openai cohere huggingface_hub transformers accelerate
!pip install langchain openai langchain-openai cohere huggingface_hub transformers accelerate llamacpp ollama

Collecting langchain
  Downloading langchain-0.1.16-py3-none-any.whl.metadata (13 kB)
Collecting openai
  Downloading openai-1.23.6-py3-none-any.whl.metadata (21 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.1.3-py3-none-any.whl.metadata (2.5 kB)
Collecting cohere
  Downloading cohere-5.3.3-py3-none-any.whl.metadata (3.1 kB)
Collecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl.metadata (18 kB)
Collecting llamacpp
  Downloading llamacpp-0.1.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting ollama
  Downloading ollama-0.1.9-py3-none-any.whl.metadata (3.8 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting async-timeout<5.0.0,>=4.0.0 (from langchain

In [7]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
# from langchain.llms import LLAMA 

In [18]:
import pandas as pd
import torch
import os
from transformers import LlamaTokenizer, LlamaForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer


# suzanne's
os.environ["HUGGING_FACE_HUB_TOKEN"] = "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"
os.environ['HuggingFaceHub_API_Token'] = "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"

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

model_save_path = '/home/ubuntu/discharge_me/discharge_me/LLMs/'
tokenizer_save_path = '/home/ubuntu/discharge_me/discharge_me/LLMs/'

# Save model
model.save_pretrained(model_save_path)

# Save tokenizer
tokenizer.save_pretrained(tokenizer_save_path)

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

('/home/ubuntu/discharge_me/discharge_me/LLMs/tokenizer_config.json',
 '/home/ubuntu/discharge_me/discharge_me/LLMs/special_tokens_map.json',
 '/home/ubuntu/discharge_me/discharge_me/LLMs/tokenizer.json')

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_save_path)
model = AutoModelForCausalLM.from_pretrained(tokenizer_save_path)

In [20]:

def query_with_llama(question, dataframes, column_lists, hadm_id, output_column):
    
    context_data = ""

    # Iterate through each DataFrame and its corresponding columns to create the context
    for df, cols in zip(dataframes, column_lists):
        # Filter the dataframe for the specific hadm_id
        target_df = df[df['hadm_id'] == hadm_id]
        if not target_df.empty:
            # Create a context string from the specified columns
            context_part = target_df[cols].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1).str.cat(sep=' ')
            context_data += " " + context_part
    
    if not context_data:
        dataframes[0].loc[dataframes[0]['hadm_id'] == hadm_id, output_column] = "No records found for the given HADM ID."
        return dataframes[0]

    # Combine the question with the aggregated context data
    prompt_text = ""
    prompt_text = question + " " + context_data

    # Encode the prompt text
    input_ids = tokenizer.encode(prompt_text, return_tensors='pt')

        # Set generation parameters
    temperature = 0.01
    top_k = 10
    top_p = 0.9
    max_length = len(prompt_text) + 500
    
    # Generate a response
    outputs = model.generate(
        input_ids,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        num_return_sequences=1
    )
    
    # Decode the output
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Strip off the prompt text from the beginning
    clean_response = answer[len(prompt_text):]
    
    # Append the answer to the first DataFrame
    dataframes[0].loc[dataframes[0]['hadm_id'] == hadm_id, output_column] = clean_response
    return dataframes[0]

    
    # Generate the response using the model
    # with torch.no_grad():  # Disable gradient calculation for inference
        # outputs = model.generate(**inputs, max_length=1024)

#     answer = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    
#     # Append the answer to the first DataFrame
#     dataframes[0].loc[dataframes[0]['hadm_id'] == hadm_id, output_column] = answer
#     return dataframes[0]

# Example usage
question = "Given the following information from a clinical note and text from radiology reports, summarize the radiology findings in a list. Limit the list to 5 entries':"
hadm_id = 24962904  # Specify the HADM ID you want to query
dfs = [discharge_sections_df, radiology_sections_df]  # List of DataFrames
relevant_cols = [['Pertinent Results'], ['EXAMINATION','INDICATION','IMPRESSION']]  # List of column lists for each DataFrame
output_col_name = 'Radiology_results'  # Specify the name of the new column

# Assuming df1 is the primary DataFrame where the output should be stored
df1 = query_with_llama(question, dfs, relevant_cols, hadm_id, output_col_name)
print(df1.loc[df1['hadm_id'] == hadm_id, output_col_name])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


0      // eval for pneumonia Mild basilar atelectas...
Name: Radiology_results, dtype: object


In [16]:
# Example usage (ensure 'merged_df_expanded_with_discharge' is defined in your script)
question = "What major treatments, surgeries or procedures did the patient receive?"
hadm_id = 24962904  # Specify the HADM ID you want to query
relevant_cols = ['HPI', 'Major Surgical Procedure']
answer = query_with_llama(question, discharge_sections_df, relevant_cols, hadm_id)
print(answer)

What major treatments, surgeries or procedures did the patient receive? Ms. ___ is a ___ female with history of 
COPD on home O2, atrial fibrillation on apixaban, hypertension, 
CAD, and hyperlipidemia who presents with shortness of breath, 
cough, and wheezing for one day.

The patient reports shortness of breath, increased cough 
productive of ___ red-flected sputum, and wheezing since 
yesterday evening.  She has been using albuterol IH more 
frequently (___) with ipratropium nebs every 4 hours with 
minimal relief. She had to increase her O2 flow up to 4L without 
significant improvement. She was currently taking 10mg of 
prednisone. She has also been taking tiotropium IH, 
theophylline, advair IH at home as prescribed. She denies sick 
contacts. She quit smoking approximately 1 month ago.

She reports an episode of chest pain in waiting room while 
sitting down, non-exertional, resolved after 2 minutes. She 
denies fever/chills, abdominal pain, nausea/vomiting, 
palpitations, and 