This file generates LLM-streamlined reports using Structure prompting approach

Load necessary libraries

In [1]:
import pandas as pd
import openpyxl
from openpyxl import load_workbook
import seaborn as sns
import matplotlib.pyplot as plt
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain.output_parsers import ResponseSchema, StructuredOutputParser, OutputFixingParser

Formating Instructions

In [2]:
Devices = ResponseSchema(name="Devices",
        description="Medical lines, tubes, or devices present in a patient's body. \
        If no lines, tubes, or devices are mentioned, write None. ")
Thyroid = ResponseSchema(name="Thyroid",
        description="Findings of the lower neck, thoracic inlet, and thyroid. \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Mediastinum = ResponseSchema(name="Mediastinum",
        description="Findings of the mediastinum (heart, heart vessels, esophagus, and large airways). \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Lungs = ResponseSchema(name="Lungs",
        description="Findings of the lungs and pleural spaces. \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Other_chest_findings = ResponseSchema(name="Other_chest_findings",
        description="Findings of other organs and areas in the chest \
        (e.g., axilla, breast, chest wall, diaphragm, retroareola). \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
Hepatobiliary = ResponseSchema(name="Hepatobiliary",
        description="Findings of the hepatobiliary system (liver, biliary system, and gallbladder). \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
Spleen = ResponseSchema(name="Spleen",
        description="Findings of the spleen. \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
Pancreas = ResponseSchema(name="Pancreas",
        description="Findings of the pancreas.\
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
Adrenals = ResponseSchema(name="Adrenals",
        description="Findings of the adrenals.\
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
Kidneys = ResponseSchema(name="Kidneys",
        description="Findings of the kidneys and ureters. \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
GI_tract = ResponseSchema(name="GI_tract",
        description="Findings of the GI tract (stomach, bowel, rectum, colon).\
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")
Peritoneum_mesentery_retroperitoneum_vasculature = ResponseSchema(name="Peritoneum_mesentery_retroperitoneum_vasculature",
        description="Findings of the peritoneum, mesentery, retroperitoneum, vasculature, and pelvic lymph nodes.\
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Reproductive_organs = ResponseSchema(name="Reproductive_organs",
        description="Findings of the reproductive organs (e.g., uterus, ovaries, vagina, prostate, testicles, penis). \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Bladder = ResponseSchema(name="Bladder",
        description="Findings of the bladder.\
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Other_pelvis_and_abdomen_findings = ResponseSchema(name="Other_pelvis_and_abdomen_findings",
        description="Findings of other organs and areas in the pelvis and abdomen \
        (e.g., Bartholin's gland, groin, inguinal region, pelvic floor, proximal thigh, spermatic cord, vulva). \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None.")
Bones = ResponseSchema(name="Bones",
        description="Findings of the bones and soft tissues. \
        If findings are normal, write Unremarkable. If no findings are mentioned, write None. ")

IMPRESSIONS = ResponseSchema(name="IMPRESSIONS",
        description="Radiologist's impressions from IMPRESSIONS section organized in a list. Do not create any new impressions and do not include clinically insignificant impressions.")

response_schemas = [Devices, Thyroid, Mediastinum, Lungs, Other_chest_findings, Hepatobiliary, Spleen, Pancreas, Adrenals, Kidneys, GI_tract, Peritoneum_mesentery_retroperitoneum_vasculature, Reproductive_organs, Bladder, Other_pelvis_and_abdomen_findings, Bones, IMPRESSIONS]

Generate LLM-streamlined reports (repeat for each radiologist)

In [4]:
#Specify the input file
input_file = 'Radiologist-1-reports.xlsx'
df = pd.read_excel(input_file)

#Choose the LLM
model = ChatOllama(model="mixtral:8x7b-instruct-v0.1-q6_K", temperature=0)

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

template = """

Given a radiology report, your task is to extract findings corresponding to organs and areas listed below from FINDINGS section, and radiologist's impressions from IMPRESSIONS section.  

Report: {text}

{format_instructions}
"""

prompt = ChatPromptTemplate.from_template(template=template)

# Loop through each cell in the column
for i, cell_content in enumerate(df['Report Text']):   #The column where reports are saved is assumed to have a title 'Report Text'
    print(f'Report {i}')
    try:
        if pd.notna(cell_content):  # Check for non-null values
          
            messages = prompt.format_messages(text=cell_content, format_instructions=format_instructions)
            response = model(messages)
            try:
                output_dict = output_parser.invoke(response.content)
            #If formatting error occurs, use the LLM-generated output and ask LLM to fix it 
            except Exception as e:
                print(f"Parsing error occurred: {e}")
                output_dict = OutputFixingParser.from_llm(parser=output_parser, llm=model).invoke(response.content)
          
            output_str = f"FINDINGS:"
            chest_sections = {
                "Devices": "Lines/tubes/devices",
                "Thyroid": "Lower neck/Thyroid",
                "Mediastinum": "Mediastinum",
                "Lungs": "Lungs/Pleural spaces",
                "Other_chest_findings": "Other",
            }
            
            abdomen_sections = {
                "Hepatobiliary": "Hepatobiliary system",
                "Spleen": "Spleen",
                "Pancreas": "Pancreas",
                "Adrenals": "Adrenals",
                "Kidneys": "Kidneys/Ureters",
                "Peritoneum_mesentery_retroperitoneum_vasculature": "Peritoneum/Mesentery, Retroperitoneum, Vasculature, and nodes",
                "Reproductive_organs": "Reproductive organs",
                "Bladder": "Bladder",
                "Other_pelvis_and_abdomen_findings": "Other",
                "Bones": "BONES AND SOFT TISSUES"
            }
            output_str += f"\n\n CHEST:"
            for section, title in chest_sections.items():
                output_str += f"\n\n{title}: "
                if isinstance(output_dict.get(section), dict):
                    for key, value in output_dict.get(section).items():
                        output_str += f"\n- {key}: {value}"
                elif isinstance(output_dict.get(section), list):
                    for j, item in enumerate(output_dict.get(section)):
                        output_str += f"\n- {j+1}. {item}"
                else:
                    output_str += f"{output_dict.get(section)}"
            
            output_str += f"\n\n ABDOMEN AND PELVIS:"
            for section, title in abdomen_sections.items():
                output_str += f"\n\n{title}: "
                if isinstance(output_dict.get(section), dict):
                    for key, value in output_dict.get(section).items():
                        output_str += f"\n- {key}: {value}"
                elif isinstance(output_dict.get(section), list):
                    for j, item in enumerate(output_dict.get(section)):
                        output_str += f"\n- {j+1}. {item}"
                else:
                    output_str += f"{output_dict.get(section)}"
    
            output_str += f"\n\nIMPRESSIONS:\n"
            for j, impression in enumerate(output_dict.get('IMPRESSIONS')):
                output_str += f"{j+1}. {impression}\n"
            
            # Update DataFrame
            df.loc[i, 'LLM-generated report'] = output_str
    except Exception as e:
        print(f"Error occurred: {e}")

df.to_excel(input_file, index=False)    

Report 0


Remove reports for which the LLM was not able to generate output

In [3]:
# List of radiologists
radiologists = ['Radiologist-1'] 

for radiologist in radiologists:
    # Read the file into a DataFrame
    df = pd.read_excel(f"{radiologist}-reports.xlsx")

    # Remove rows with empty cells in the "LLM-generated report" column
    df_cleaned = df.dropna(subset=['LLM-generated report'])

    # Save the cleaned DataFrame back to the Excel file
    df_cleaned.to_excel(f"{radiologist}-reports.xlsx", index=False)
    
    # Print the number of rows after rows are dropped
    print(f"Number of rows for {radiologist} after cleaning: {df_cleaned.shape[0]}")

Compute Conciseness Percentage (CP) score and create a box plot

In [None]:
#Define the function that counts the words separated by whitespace and '/' delimiter
def count_words(text):
    # Split the text into words using both whitespace and '/' as delimiters
    words = text.split()  # Split by whitespace first
    words = [word.split('/') for word in words]  # Split each word by '/', remove this line if you only want to count the words that are separated by whitespace
    # Flatten the list of lists to count all words individually
    flattened_words = [word for sublist in words for word in sublist]
    # Return the number of words
    return len(flattened_words)

# List of radiologists
radiologists = ['Radiologist-1']

#List of CP scores of all radiologists
all_CP_values = []

for radiologist in radiologists:
    # Read the file into a DataFrame
    input_file = f"{radiologist}-reports.xlsx"
    df = pd.read_excel(input_file)
    workbook = load_workbook(input_file)
    sheet = workbook.active
    
    #List of CP scores of a single radiologist
    CP_list = []
    for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=0, max_col=sheet.max_column):  # Adjust min_col and max_col if needed
        word_counts = []
        for cell in row:
            cell.alignment = openpyxl.styles.Alignment(wrap_text=True)
            cell_value = str(cell.value)
            word_count = count_words(cell_value)
            word_counts.append(word_count)
        CP_list.append((word_counts[1]/ (word_counts[0]))*100)

    df['CP'] = CP_list
    df.to_excel(input_file, index=False)
    # Append CP scores to the list of all CP scores of all radiologists
    all_CP_values.append(df['CP'])

# Create a DataFrame with all CP values for each radiologist
df_combined = pd.concat(all_CP_values, axis=1, keys=radiologists)

# Create labels for the x-axis
labels = ['Radiologist 1']

# Plot boxplots
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_combined, palette="Set3", order=radiologists)
plt.axhline(y=100, color='lightgrey', linestyle='--')
plt.ylabel('CP (%)')
plt.xticks(ticks=range(len(radiologists)), labels=labels, rotation=45) 
plt.tight_layout()
plt.show()

Print indicies of the reports that have CP > 100% and their total number

In [None]:
# List of radiologists
radiologists = ['Radiologist-1']

# Dictionary to store the count and indexes for each radiologist
CP_gt_100_info = {}

for radiologist in radiologists:
    # Read the file into a DataFrame
    input_file = f"{radiologist}-reports.xlsx"
    df = pd.read_excel(input_file)

    # Count reports where CP > 100
    CP_gt_100_indexes = df[df['CP'] > 100].index.tolist()
    CP_gt_100_count = len(CP_gt_100_indexes)

    # Store the count and indexes in the dictionary
    CP_gt_100_info[radiologist] = {'count': CP_gt_100_count, 'indexes': CP_gt_100_indexes}

# Print the counts and indexes for each radiologist
for radiologist, info in CP_gt_100_info.items():
    count = info['count']
    indexes = info['indexes']
    print(f"Radiologist {radiologist}: {count} reports with CP > 100%")
    print(f"Indexes of reports with CP > 100%: {indexes}")