# Import and Setup

In [526]:
# Import necessary libraries
import os
import json
import random
from dotenv import load_dotenv
from langchain import LLMChain
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI
from PIL import Image as PILImage
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image, PageBreak

In [527]:
# Load API keys from .env file
# Replace this with the path to where your .env file is, which should contain openAI API keys as well as other necessary environment variables
# Env file should contain something like this:
# OPENAI_API_KEY=your openai api key
# OPENAI_ORG_ID=your org id
# LANGCHAIN_TRACING_V2='true'
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY=your langchain api key
# LANGCHAIN_PROJECT=the project name you want to use
load_dotenv("../.env") 

# Set environment variables for LangChain
os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [528]:
# Set up the OpenAI model
model = ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo')

In [529]:
### Key setup variables

transcripts_version = 1.0

# System Prompts

In [530]:
system_message_summarize_detailed = """

You are a medical assistant tasked with reviewing a transcript of a conversation between a patient and their doctor. You will be provided a transcript. The doctor has asked you to write up a summary of the transcript in the format outlined below. Return your summary as a Python dictionary as follows: {{"Patient Overview": "", "Current Symptoms": "", "Vital Signs": "", "Current Medications": "", "Summary": ""}}. Ensure the output is in proper dictionary format. The value for each key is a string which contains the text of the summary, including new line characters where appropriate. Add context to symptoms where appropriate, but be brief. List specific medications by name under the appropriate medication category. Do not add any information that is not present in the transcript.

Patient Overview:
    Write a one sentence summary like "[Patient Name] is experiencing [primary symptom or chief complaint]"

Current Symptoms (Note: Separate each symptom with a new line. Determine if the patient is experiencing any of the following: Dyspnea, Paroxysmal Nocturnal Dyspnea (PND), Orthopnea, Edema, Nocturnal Cough, Chest Pain, Fatigue, Sudden Change in Mental Status.):
     List the symptoms the patient is currently experiencing

Vital Signs (Note: Separate each vital sign with a new line. Put N/A if not reported in transcript):
     Temperature: 
     Heart Rate: 
     Respiratory Rate:
     Oxygen Saturation: 
     Blood Pressure: 
     Weight: 

Current Medications (Note: separate each medication with a new line):
     List the medications the patient is taking.

Summary:
     At a high level, summarize a few key points from the transcript. Include the symptoms that the patient confirms, and the symptoms that the patient denies. Do not list vital sign details in this section.

"""

# Load Transcripts Data

In [531]:
## Load patient data

# Specify the path to your JSON file
json_file_path = f"../data/patients/patients_{transcripts_version}_with_transcripts.json"

# Open and read the JSON file
with open(json_file_path, 'r') as json_file:
    patients = json.load(json_file)

In [532]:
random_key = random.choice(list(patients.keys()))

In [533]:
patient_transcript = patients[random_key]['chat_transcript']

for line in patient_transcript:
    print(line)

Doctor: Hello Gregory Reynolds, I'm here to check on how you're feeling today. Let's go over how you've been doing since your discharge.
Patient: Um... I've been feeling a bit short of breath, especially when I try to do things around the house. It's been a bit tough to keep up with everything.
Doctor: I'm sorry to hear that you've been feeling short of breath, Gregory. Can you tell me if this shortness of breath occurs at rest, when you're walking, or when you're climbing stairs?
Patient: It mainly happens when I'm walking around the house or trying to do chores. Stairs are especially hard for me lately.
Doctor: Based on your responses, it seems like you're experiencing shortness of breath mainly when you're walking around or climbing stairs. Have you had sudden shortness of breath that wakes you up at night, known as paroxysmal nocturnal dyspnea?
Patient: No, I haven't had that sudden shortness of breath at night. It's mostly during the day when I'm up and about.
Doctor: Thank you fo

In [534]:
patient_transcript_string = ""

for line in patient_transcript:
    patient_transcript_string = patient_transcript_string + line

# Generate Example Summary

In [535]:
parser = StrOutputParser()

In [536]:
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_message_summarize_detailed), ("user", patient_transcript)]
)

In [537]:
chain = prompt_template | model | parser

In [538]:
result = chain.invoke({"system": system_message_summarize_detailed, "text": patient_transcript})

In [539]:
print(result)

{"Patient Overview": "Gregory Reynolds is experiencing shortness of breath and fatigue", "Current Symptoms": "Shortness of breath\nFatigue\nTrouble sleeping\nLightheadedness when standing up", "Vital Signs": "Temperature: 98.4\nHeart Rate: 106\nRespiratory Rate: 18 breaths per minute\nOxygen Saturation: 99.0\nBlood Pressure: 114/57\nWeight: N/A", "Current Medications": "Carvedilol\nNitroglycerin\nAspirin\nMetolazone\nDronedarone", "Summary": "Gregory Reynolds is currently experiencing shortness of breath mainly when walking or climbing stairs. He denies paroxysmal nocturnal dyspnea, orthopnea, edema, nocturnal cough, and chest pain. He confirms feeling fatigued, having trouble sleeping, and feeling lightheaded when standing up. His vital signs show a slightly elevated heart rate and lower blood pressure. Gregory is taking Carvedilol, Nitroglycerin, Aspirin, Metolazone, and Dronedarone to manage his heart condition. He mentions feeling forgetful at times and having a hard time keeping u

In [540]:
result_dict = json.loads(str(result))

In [541]:
print(result_dict)

{'Patient Overview': 'Gregory Reynolds is experiencing shortness of breath and fatigue', 'Current Symptoms': 'Shortness of breath\nFatigue\nTrouble sleeping\nLightheadedness when standing up', 'Vital Signs': 'Temperature: 98.4\nHeart Rate: 106\nRespiratory Rate: 18 breaths per minute\nOxygen Saturation: 99.0\nBlood Pressure: 114/57\nWeight: N/A', 'Current Medications': 'Carvedilol\nNitroglycerin\nAspirin\nMetolazone\nDronedarone', 'Summary': "Gregory Reynolds is currently experiencing shortness of breath mainly when walking or climbing stairs. He denies paroxysmal nocturnal dyspnea, orthopnea, edema, nocturnal cough, and chest pain. He confirms feeling fatigued, having trouble sleeping, and feeling lightheaded when standing up. His vital signs show a slightly elevated heart rate and lower blood pressure. Gregory is taking Carvedilol, Nitroglycerin, Aspirin, Metolazone, and Dronedarone to manage his heart condition. He mentions feeling forgetful at times and having a hard time keeping u

## Generate Example PDF

In [542]:
logo_path = f"../docs/reco_logo.jpeg"

In [543]:
def create_patient_report(p_id, data, output_filename):
    # Create a canvas
    c = canvas.Canvas(output_filename, pagesize=letter)
    
    # Set up the logo (adjust size as necessary)
    logo = PILImage.open(logo_path)
    img_width, img_height = logo.size
    
    # Set font styles
    title_style = "Helvetica-Bold"
    section_title_style = "Helvetica-Bold"
    section_content_style = "Helvetica"
    
    # Draw logo and title
    title = "RECO Patient Report"
    c.setFont(title_style, 18)
    c.drawImage(logo_path, 50, 720, width=img_width/4, height=img_height/4)
    c.drawString(88, 727, title)

    # Vertical position for content
    y_position = 685
    
    # Define paragraph styles
    styles = getSampleStyleSheet()
    body_style = styles["Normal"]
    body_style.fontName = "Helvetica"
    body_style.fontSize = 11
    body_style.leading=14
    
    # Iterate through the dictionary and draw each section
    for key, value in data.items():
        # Section title
        c.setFont(section_title_style, 12)
        y_position -= 20  # Move down 20 units
        c.drawString(50, y_position, key.upper())
        
        # Section content
        c.setFont(section_content_style, 11)
        y_position -= 20  # Move down another 20 units for content
        
        # Check if it's the Summary section to apply text wrapping
        if key == 'Summary' or key == 'Patient Overview':
            # Create a paragraph with the summary text
            summary_text = value.replace('\n', '<br/>')  # Replace newlines with HTML line breaks

            if summary_text[-1] != '.':
                summary_text = summary_text + '.'

            summary_paragraph = Paragraph(summary_text, body_style)
            
            # Calculate required height for the paragraph
            width, height = summary_paragraph.wrap(500, 800)
            
            # Draw the paragraph on the canvas
            summary_paragraph.drawOn(c, 50, y_position - height + 10)
            
            # Update y_position to move to the next section
            y_position -= height + 20  # Add extra space after the paragraph
            
        else:
            # Normal section content handling
            content_lines = value.split('\n')
            max_line_length = 0
            
            for line in content_lines:
                if len(line) > max_line_length:
                    max_line_length = len(line)

            if max_line_length < 99:
            
                for line in content_lines:
                    if key.lower() in line:
                        continue

                    c.drawString(50, y_position, line)
                    y_position -= 14  # Move down 14 units for each line
                
                y_position -= 10  # Add some space between sections
            
            else:
                summary_text = value.replace('\n', '<br/>')  # Replace newlines with HTML line breaks

                if summary_text[-1] != '.':
                    summary_text = summary_text + '.'

                summary_paragraph = Paragraph(summary_text, body_style)
                
                # Calculate required height for the paragraph
                width, height = summary_paragraph.wrap(500, 800)

                # Check if the line will fit on the current page
                if y_position < 100 + height:
                    # If not, create a new page
                    c.showPage()
                    y_position = letter[1] - 50  # Reset y position for new page
                
                # Draw the paragraph on the canvas
                summary_paragraph.drawOn(c, 50, y_position - height + 10)
                
                # Update y_position to move to the next section
                y_position -= height + 10  # Add extra space after the paragraph

    # Add the transcript to the end of the file
    # Section title
    c.showPage()
    y_position = letter[1] - 50  # Reset y position for new page

    section_title = "Transcript"
    c.setFont(section_title_style, 12)
    y_position -= 20  # Move down 20 units
    c.drawString(50, y_position, section_title.upper())
    
    # Section content
    c.setFont(section_content_style, 11)
    y_position -= 20  # Move down another 20 units for content
        
    for line in patients[p_id]['chat_transcript']:
        
        # Create a paragraph with the summary text
        summary_text = line.replace('\n', '<br/>')  # Replace newlines with HTML line breaks

        if "Doctor" in summary_text[:6]:
            summary_text = "DOCTOR" + summary_text[6:]
        if "Patient" in summary_text[:7]:
            summary_text = "PATIENT" + summary_text[7:]

        summary_paragraph = Paragraph(summary_text, body_style)
        
        # Calculate required height for the paragraph
        width, height = summary_paragraph.wrap(500, 800)

        # Check if the line will fit on the current page
        if y_position < 50 + height:
            # If not, create a new page
            c.showPage()
            y_position = letter[1] - 50  # Reset y position for new page
        
        # Draw the paragraph on the canvas
        summary_paragraph.drawOn(c, 50, y_position - height + 10)
        
        # Update y_position to move to the next section
        y_position -= height + 10  # Add extra space after the paragraph
    
    # Save the PDF file
    c.save()

In [544]:
# Specify the path to your PDF file
pdf_filename = f"../data/patients/pdfs/{random_key}_pdf_{transcripts_version}.pdf"

In [545]:
# Call the function to create the PDF
create_patient_report(random_key, result_dict, pdf_filename)
print(f"PDF created: {pdf_filename}")

PDF created: ../data/patients/pdfs/14807966_pdf_1.0.pdf


## Generate Summaries & PDFs for All Patients in Trascripts File

In [546]:
# Specify the path to your new summaries JSON file
summary_json_file_path = f"../data/patients/patients_{transcripts_version}_summaries.json"

In [547]:
def get_llm_summary(transcript):
    parser = StrOutputParser()

    prompt_template = ChatPromptTemplate.from_messages(
        [("system", system_message_summarize_detailed), ("user", transcript)]
    )
    
    chain = prompt_template | model | parser
    
    summary = chain.invoke({"system": system_message_summarize_detailed, "text": transcript})

    return summary

In [548]:
summaries = {}

for p_id in patients:
    summary = {}
    tries = 0
    no_parseable_summary = True

    p_transcript = patients[p_id]['chat_transcript']

    print(f"Generating summary for patient {p_id}")

    while no_parseable_summary and tries < 10:
        llm_summary_result = get_llm_summary(p_transcript)
        
        try:
            result_dict = json.loads(str(llm_summary_result))
            no_parseable_summary = False
        except:
            tries +=1
            print(f"Attempt {tries} failed")

    if tries >= 10:
        raise NotImplementedError("LLM did not produce parseable summary in first 10 attempts.")
    
    summary['id'] = p_id
    summary['summary'] = result_dict

    summaries[str(p_id)] = summary

    print("Success")

Generating summary for patient 12305811
Success
Generating summary for patient 14185111
Success
Generating summary for patient 10339317
Success
Generating summary for patient 14807966
Success
Generating summary for patient 13912736
Success
Generating summary for patient 15338322
Success
Generating summary for patient 13166275
Success
Generating summary for patient 18136989
Success
Generating summary for patient 15345003
Success
Generating summary for patient 17707918
Attempt 1 failed
Success
Generating summary for patient 17566649
Success
Generating summary for patient 19516114
Success
Generating summary for patient 19557627
Success
Generating summary for patient 11242742
Attempt 1 failed
Success
Generating summary for patient 12390274
Success
Generating summary for patient 11080025
Success
Generating summary for patient 14540393
Success
Generating summary for patient 12807868
Success
Generating summary for patient 13727153
Success
Generating summary for patient 13727871
Success


In [549]:
# Generate PDFs

for key in summaries.keys():
    print(f"Generating PDF for patient {key}")
    # Specify the path to your JSON file
    pdf_filename = f"../data/patients/pdfs/{key}_pdf_{transcripts_version}.pdf"

    # Call the function to create the PDF
    create_patient_report(key, summaries[key]["summary"], pdf_filename)
    print(f"PDF created: {pdf_filename}")

Generating PDF for patient 12305811
PDF created: ../data/patients/pdfs/12305811_pdf_1.0.pdf
Generating PDF for patient 14185111
PDF created: ../data/patients/pdfs/14185111_pdf_1.0.pdf
Generating PDF for patient 10339317
PDF created: ../data/patients/pdfs/10339317_pdf_1.0.pdf
Generating PDF for patient 14807966
PDF created: ../data/patients/pdfs/14807966_pdf_1.0.pdf
Generating PDF for patient 13912736
PDF created: ../data/patients/pdfs/13912736_pdf_1.0.pdf
Generating PDF for patient 15338322
PDF created: ../data/patients/pdfs/15338322_pdf_1.0.pdf
Generating PDF for patient 13166275
PDF created: ../data/patients/pdfs/13166275_pdf_1.0.pdf
Generating PDF for patient 18136989
PDF created: ../data/patients/pdfs/18136989_pdf_1.0.pdf
Generating PDF for patient 15345003
PDF created: ../data/patients/pdfs/15345003_pdf_1.0.pdf
Generating PDF for patient 17707918
PDF created: ../data/patients/pdfs/17707918_pdf_1.0.pdf
Generating PDF for patient 17566649
PDF created: ../data/patients/pdfs/17566649_

In [550]:
# Write summaries to JSON

summaries_json_file_name = f"../data/patients/patients_{transcripts_version}_summaries.json"

with open(summaries_json_file_name, 'w') as json_file:
    json.dump(summaries, json_file)