In [10]:
import os
import numpy as np
import pandas as pd
import pydicom
from PIL import Image
from google.cloud import storage
from torchvision import transforms
from transformers import BertTokenizer, BertModel
import torch
from torch import nn
from torch.utils.data import DataLoader
from io import BytesIO
import torch
from transformers import BertTokenizer, BertModel
from torchvision.models import resnet50
class ALBEFModel(nn.Module):
    def __init__(self, image_model, text_model, embed_dim):
        super(ALBEFModel, self).__init__()
        self.image_model = image_model
        self.text_model = text_model
        self.image_proj = nn.Linear(image_model.fc.in_features, embed_dim)
        self.text_proj = nn.Linear(text_model.config.hidden_size, embed_dim)

    def forward(self, image, text_input_ids, text_attention_mask):
        image_features = self.image_model(image)
        image_embed = self.image_proj(image_features)

        text_output = self.text_model(input_ids=text_input_ids, attention_mask=text_attention_mask)
        text_features = text_output.last_hidden_state[:, 0, :]
        text_embed = self.text_proj(text_features)

        return image_embed, text_embed
image_model = resnet50(pretrained=True)
num_features = image_model.fc.in_features
image_model.fc = nn.Linear(num_features, num_features) 

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
text_model = BertModel.from_pretrained('bert-base-uncased')

embed_dim = 512
model = ALBEFModel(image_model, text_model, embed_dim)


model.load_state_dict(torch.load('albef_model_server.pth'))
model.eval()


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)



ALBEFModel(
  (image_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

ALBEFModel(
  (image_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [12]:
# Define the BERT tokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def generate_text_embeddings(text):
    tokenized_text = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    input_ids = tokenized_text['input_ids'].to(device)
    attention_mask = tokenized_text['attention_mask'].to(device)
    with torch.no_grad():
        _, text_embed = model(torch.zeros((1, 3, 224, 224)).to(device), input_ids, attention_mask)
    return text_embed.cpu().numpy().flatten()
import pandas as pd

triplets_path = 'triplets_server.csv'
triplets_df = pd.read_csv(triplets_path)

print(triplets_df.head())

triplets = list(triplets_df.itertuples(index=False, name=None))


def generate_triplet_embeddings(triplet):
    triplet_text = f"{triplet[0]} {triplet[1]} {triplet[2]}"
    return generate_text_embeddings(triplet_text)

triplet_embeddings = [generate_triplet_embeddings(triplet) for triplet in triplets]

                                             Entity1      Relation  \
0                                      Hair artifact   has symptom   
1                                      Hair artifact  described by   
2         Large left upper lobe necrotic lung cancer   has symptom   
3         Large left upper lobe necrotic lung cancer  described by   
4  Congenital lobar overinflation - left lung upp...   has symptom   

                                             Entity2  
0                               Shortness of breath.  
1  Heart size normal. Lungs clear. Pronounced ela...  
2                   Progressive shortness of breath.  
3  An 8 cm mass in the left upper lobe extends to...  
4                              Respiratory distress.  


  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [13]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

image_embeddings = np.load('image_embeddings_server.npy', allow_pickle=True).item()
text_embeddings = np.load('text_embeddings_server.npy', allow_pickle=True).item()

# Function to retrieve top-k texts and triplets based on cosine similarity
def retrieve_top_k_texts_and_triplets(image_embeddings, text_embeddings, triplet_embeddings, triplets, k=5):
    top_k_texts_and_triplets = {}

    image_embeds = np.array([image_embeddings[key] for key in image_embeddings])
    text_embeds = np.array([text_embeddings[key] for key in text_embeddings])

        # Compute cosine similarity matrices
    text_similarity_matrix = cosine_similarity(image_embeds, text_embeds)
    triplet_similarity_matrix = cosine_similarity(image_embeds, triplet_embeddings)

    for i, image_id in enumerate(image_embeddings):
        top_k_text_indices = text_similarity_matrix[i].argsort()[-k:][::-1]
        top_k_triplet_indices = triplet_similarity_matrix[i].argsort()[-k:][::-1]

        top_k_text_ids = [list(text_embeddings.keys())[index] for index in top_k_text_indices]
        top_k_triplets = [triplets[index] for index in top_k_triplet_indices]

        top_k_texts_and_triplets[image_id] = {
            'text_ids': top_k_text_ids,
            'triplets': top_k_triplets
        }

    return top_k_texts_and_triplets
# Generate triplet embeddings if not already loaded
top_k_texts_and_triplets = retrieve_top_k_texts_and_triplets(image_embeddings, text_embeddings, triplet_embeddings, triplets, k=5)



In [14]:
selected_image_id=['450', '173', '23', '210', '17', '153', '149', '215', '242', '163']

In [15]:
top_k_texts_and_triplets_selected = {}
for numeric_id in selected_image_id:
    matching_image_id = next((img_id for img_id in top_k_texts_and_triplets if img_id.startswith(f"{numeric_id}_")), None)

    if matching_image_id:
        top_k_texts_and_triplets_selected[matching_image_id] = top_k_texts_and_triplets[matching_image_id]
    else:
        print(f"Warning: No matching image ID found for numeric ID {numeric_id}")

for image_id, data in top_k_texts_and_triplets_selected.items():
    print(f"Image ID: {image_id}")
    print(f"Top-{len(data['text_ids'])} Text IDs: {data['text_ids']}")
    print(f"Top-{len(data['triplets'])} Triplets: {data['triplets']}\n")

Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Top-5 Text IDs: ['31402_s51718048', '18549_s50696726', '23163_s53192951', '19294_s59515023', '184_s54234360']
Top-5 Triplets: [('Hiatus hernia', 'described by', 'Air-fluid level behind the heart on the lateral radiograph.'), ('Sickle cell disease with acute chest syndrome', 'described by', 'Cardiac silhouette is mildly enlarged. There is bilateral lower zone atelectasis and hazy airspace opacities. Bones are diffusely sclerotic with multi-level H-shaped vertebral bodies. Splenic shadow is difficult to visualize.'), ('Giant cell interstitial pneumonia', 'described by', 'heart is enlarged with a cardiothoracic ratio of 0.59 ill-defined "shaggy" left heart border suggests interstitial thickening in the lingula increased interstitial markings in a bibasal distribution no consolidation, pulmonary nodules, or pleural effusion degenerative change of the shoulders and thoracic spine'), ('Usual interstitial pneumonia (UIP)', 'described 

In [16]:
pathology_words = ["atelectasis", "opacities", "consolidation", "effusion", "pneumothorax"]
positional_words = ["right", "left", "bilateral", "upper", "lower", "middle"]
severity_words = ["mild", "moderate", "severe"]
size_words = ["small", "large"]

def construct_detailed_prompt(image_id, context_texts, triplets):
    context = " ".join(context_texts)
    triplets_text = " ".join([f"{triplet[0]} {triplet[1]} {triplet[2]}" for triplet in triplets])

    prompt = f"""
    You are an assistant designed to write impression summaries for the radiology report.
    Users will send a context text and you will respond with an impression summary using that context.
    Instructions:
    • Impression should be based on the information that the user will send in the context.
    • The impression should not mention anything about follow-up actions.
    • Impression should not contain any mentions of prior or previous studies.
    • Use the following words for attributes where applicable:
        - Pathology: {', '.join(pathology_words)}
        - Positional: {', '.join(positional_words)}
        - Severity: {', '.join(severity_words)}
        - Size: {', '.join(size_words)}
    CONTEXT: {context} {triplets_text}
    IMPRESSION:
    """
    return prompt

In [17]:
all_reports_df=pd.read_csv("all_reports_df_images.csv")

In [18]:
def construct_detailed_prompt(image_id, context_texts, triplets):
    prompt = f"Image ID: {image_id}\n"
    prompt += "Context Texts:\n"
    for context in context_texts:
        prompt += f"- {context}\n"
    prompt += "Triplets:\n"
    for triplet in triplets:
        prompt += f"- {triplet[0]} {triplet[1]} {triplet[2]}\n"
    return prompt

detailed_prompts = {}

# Process each image and generate detailed prompts

for image_id, data in top_k_texts_and_triplets_selected.items():
    if any(image_id.startswith(selected_id) for selected_id in selected_image_id):
        context_text_ids = data['text_ids']
        context_texts = []

        print(f"Processing Image ID: {image_id}")
        print(f"Top-5 Text IDs: {context_text_ids}")

        for text_id in context_text_ids:
            report_row = all_reports_df[all_reports_df['report_id'] == text_id]
            if not report_row.empty:
                context_texts.append(report_row['cleaned_report'].values[0])
            else:
                print(f"Warning: Report ID {text_id} not found in all_reports_df.")
        # Skip if no context texts found
        if not context_texts:
            print(f"Warning: No context texts found for image ID {image_id}.")
            continue
            
        triplets = data['triplets']

        detailed_prompts[image_id] = construct_detailed_prompt(image_id, context_texts, triplets)

for image_id, prompt in detailed_prompts.items():
    print(f"Prompt for Image ID: {image_id}\n{prompt}\n")


Processing Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Top-5 Text IDs: ['31402_s51718048', '18549_s50696726', '23163_s53192951', '19294_s59515023', '184_s54234360']
Processing Image ID: 173_eb2fabb7-4bbc8aab-d7371282-08e5bcb5-de2e430a
Top-5 Text IDs: ['9335_s56406916', '22008_s50002557', '11261_s58493244', '22683_s54917419', '194_s53950795']
Processing Image ID: 23_28fad2ac-d6001216-b4f72c5b-2d4d452e-17b6c9a5
Top-5 Text IDs: ['19653_s56073986', '20585_s56207921', '11908_s59124602', '24181_s59459488', '12707_s53019840']
Processing Image ID: 210_5053834b-b1bea04f-680aec42-45abe415-c2d097ba
Top-5 Text IDs: ['6870_s56090555', '7286_s53507895', '1758_s55579609', '5815_s56477239', '7183_s55768555']
Processing Image ID: 17_6ad819bb-bae74eb9-7b663e90-b8deabd7-57f8054a
Top-5 Text IDs: ['21587_s55564934', '19292_s58015625', '10631_s54279389', '5377_s51448682', '31901_s58288699']
Processing Image ID: 153_2c814e99-aa096010-bd722059-5a35bd69-a902e44e
Top-5 Text IDs: ['19535_s57131561

In [19]:
detailed_prompts = {}

for image_id, data in top_k_texts_and_triplets_selected.items():
    if any(image_id.startswith(selected_id) for selected_id in selected_image_id):
        context_text_ids = data['text_ids']
        context_texts = []

        print(f"Processing Image ID: {image_id}")
        print(f"Top-5 Text IDs: {context_text_ids}")

        for text_id in context_text_ids:
            report_row = all_reports_df[all_reports_df['report_id'] == text_id]
            if not report_row.empty:
                context_texts.append(report_row['cleaned_report'].values[0])
            else:
                print(f"Warning: Report ID {text_id} not found in all_reports_df.")

        if not context_texts:
            print(f"Warning: No context texts found for image ID {image_id}.")
            continue

        triplets = data['triplets']
        detailed_prompts[image_id] = construct_detailed_prompt(image_id, context_texts, triplets)

for image_id, prompt in detailed_prompts.items():
    print(f"Prompt for Image ID: {image_id}\n{prompt}\n")

Processing Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Top-5 Text IDs: ['31402_s51718048', '18549_s50696726', '23163_s53192951', '19294_s59515023', '184_s54234360']
Processing Image ID: 173_eb2fabb7-4bbc8aab-d7371282-08e5bcb5-de2e430a
Top-5 Text IDs: ['9335_s56406916', '22008_s50002557', '11261_s58493244', '22683_s54917419', '194_s53950795']
Processing Image ID: 23_28fad2ac-d6001216-b4f72c5b-2d4d452e-17b6c9a5
Top-5 Text IDs: ['19653_s56073986', '20585_s56207921', '11908_s59124602', '24181_s59459488', '12707_s53019840']
Processing Image ID: 210_5053834b-b1bea04f-680aec42-45abe415-c2d097ba
Top-5 Text IDs: ['6870_s56090555', '7286_s53507895', '1758_s55579609', '5815_s56477239', '7183_s55768555']
Processing Image ID: 17_6ad819bb-bae74eb9-7b663e90-b8deabd7-57f8054a
Top-5 Text IDs: ['21587_s55564934', '19292_s58015625', '10631_s54279389', '5377_s51448682', '31901_s58288699']
Processing Image ID: 153_2c814e99-aa096010-bd722059-5a35bd69-a902e44e
Top-5 Text IDs: ['19535_s57131561

In [None]:
#Below key has been deleted
from openai import OpenAI
client = OpenAI(
    api_key='XXX',
)

In [22]:
def generate_report(prompt):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a medical AI specialized in generating radiology reports."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=300 
    )
    return(response)
    

generated_reports = {}
for image_id, prompt in detailed_prompts.items():
    generated_reports[image_id] = generate_report(prompt)
    print(f"Generated report for image {image_id}:\n{generated_reports[image_id]}\n")

with open('generated_reports.txt', 'w') as file:
    for image_id, report in generated_reports.items():
        file.write(f"Image ID: {image_id}\n")
        file.write(f"{report}\n")
        file.write("\n" + "-"*80 + "\n")

print("Report generation completed.")


Generated report for image 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f:
ChatCompletion(id='chatcmpl-9p8MRA0VOXPtsRFrp2aXvLdwmOMW9', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Multiple findings are present including bilateral perihilar opacities, possible effusions indicated by blunting of the costophrenic angles, and potential atelectasis or consolidation in the region of the minor fissure. Atherosclerotic and mitral annular calcifications are noted, alongside an enlarged cardiac silhouette. There are also multiple compression deformities in the thoracic and lumbar spine. \n\nAdditional findings include moderate enlargement of the heart, central vascular congestion with mild interstitial edema and right lower and middle lobe densities which may indicate consolidations. No pneumothorax is seen.\n\nThe presence of multifocal regions of consolidation are compatible with metastatic lesions in the lungs. However, no definitive a

In [23]:
generated_reports

{'450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f': ChatCompletion(id='chatcmpl-9p8MRA0VOXPtsRFrp2aXvLdwmOMW9', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Multiple findings are present including bilateral perihilar opacities, possible effusions indicated by blunting of the costophrenic angles, and potential atelectasis or consolidation in the region of the minor fissure. Atherosclerotic and mitral annular calcifications are noted, alongside an enlarged cardiac silhouette. There are also multiple compression deformities in the thoracic and lumbar spine. \n\nAdditional findings include moderate enlargement of the heart, central vascular congestion with mild interstitial edema and right lower and middle lobe densities which may indicate consolidations. No pneumothorax is seen.\n\nThe presence of multifocal regions of consolidation are compatible with metastatic lesions in the lungs. However, no definitive acute cardiopulmonary pro

In [24]:
import re

def clean_report_text(text):
    text = re.sub(r'\n', ' ', text)  
    text = re.sub(r'\s+', ' ', text) 
    text = text.strip()
    return text


In [25]:
import re

# Function to extract report content from a file and clean it
def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    
    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

               
                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports


file_path = 'generated_reports.txt'

cleaned_generated_reports = extract_report_content(file_path)


for image_id, report in cleaned_generated_reports.items():
    print(f"Image ID: {image_id}")
    print(f"Report: {report}\n")



Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Report: Multiple findings are present including bilateral perihilar opacities, possible effusions indicated by blunting of the costophrenic angles, and potential atelectasis or consolidation in the region of the minor fissure. Atherosclerotic and mitral annular calcifications are noted, alongside an enlarged cardiac silhouette. There are also multiple compression deformities in the thoracic and lumbar spine.   Additional findings include moderate enlargement of the heart, central vascular congestion with mild interstitial edema and right lower and middle lobe densities which may indicate consolidations. No pneumothorax is seen.  The presence of multifocal regions of consolidation are compatible with metastatic lesions in the lungs. However, no definitive acute cardiopulmonary process is identified.  Chronic pulmonary fibrotic changes are noted extensively, especially in the peripheral basal regions. Cardiomegaly is moderate and

In [26]:
cleaned_generated_reports

{'450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f': 'Multiple findings are present including bilateral perihilar opacities, possible effusions indicated by blunting of the costophrenic angles, and potential atelectasis or consolidation in the region of the minor fissure. Atherosclerotic and mitral annular calcifications are noted, alongside an enlarged cardiac silhouette. There are also multiple compression deformities in the thoracic and lumbar spine.   Additional findings include moderate enlargement of the heart, central vascular congestion with mild interstitial edema and right lower and middle lobe densities which may indicate consolidations. No pneumothorax is seen.  The presence of multifocal regions of consolidation are compatible with metastatic lesions in the lungs. However, no definitive acute cardiopulmonary process is identified.  Chronic pulmonary fibrotic changes are noted extensively, especially in the peripheral basal regions. Cardiomegaly is moderate and stable. Ther

In [29]:
import re

# Function to extract and clean report content from a file
def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports

def save_reports_as_txt(cleaned_reports, output_file_path):
    with open(output_file_path, 'w') as file:
        for image_id, report in cleaned_reports.items():
            file.write(f"Image ID: {image_id}\n")
            file.write(f"Report: {report}\n")
            file.write("--------------------------------------------------------------------------------\n")

file_path = 'generated_reports.txt'
output_file_path = 'cleaned_generated_reports.txt'

cleaned_generated_reports = extract_report_content(file_path)
save_reports_as_txt(cleaned_generated_reports, output_file_path)

print(f"Cleaned reports have been saved to {output_file_path}")


Cleaned reports have been saved to cleaned_generated_reports.txt


In [30]:
import pandas as pd


all_reports_df = pd.read_csv('all_reports_df_images.csv')

data = []

with open('cleaned_generated_reports.txt', 'r') as file:
    content = file.read().strip().split('--------------------------------------------------------------------------------')

for block in content:
    lines = block.strip().split('\n')
    if len(lines) > 1:
        try:
            image_id = lines[0].split(': ')[1]
            cleaned_report = ' '.join(lines[1:]).strip()
            data.append([image_id, cleaned_report])
        except IndexError:
            print(f"Skipping malformed block: {block}")

cleaned_reports_df = pd.DataFrame(data, columns=['image_id', 'AI_generated_report'])

filtered_df = all_reports_df[all_reports_df['image_id'].isin(cleaned_reports_df['image_id'])]

filtered_df = filtered_df.merge(cleaned_reports_df, on='image_id', how='left')

filtered_df.to_csv('filtered_reports_with_ai.csv', index=False)



In [32]:
def generate_report(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a medical AI specialized in generating radiology reports."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=300  
    )
    return(response)
    

generated_reports_4o = {}
for image_id, prompt in detailed_prompts.items():
    generated_reports[image_id] = generate_report(prompt)
    print(f"Generated report for image {image_id}:\n{generated_reports[image_id]}\n")

with open('generated_reports_4o.txt', 'w') as file:
    for image_id, report in generated_reports.items():
        file.write(f"Image ID: {image_id}\n")
        file.write(f"{report}\n")
        file.write("\n" + "-"*80 + "\n")

print("Report generation completed.")


Generated report for image 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f:
ChatCompletion(id='chatcmpl-9p8W0dbfFFo2SW0gzh50XEbO7X0DO', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Impression Summary:\nBilateral perihilar opacities and blunting of posterior costophrenic angles suggestive of effusions. Linear opacity in the minor fissure, potentially indicative of fluid or atelectasis. Cardiac silhouette appears enlarged, potentially due to poor inspiratory effort. Dense mitral annular and aortic calcifications are present. Age indeterminate multifocal compression deformities within the thoracic and likely lumbar spine. Findings are suggestive of congestive heart failure with a possibility of a superimposed infection.', role='assistant', function_call=None, tool_calls=None))], created=1721974488, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_400f27fa1f', usage=CompletionUsage(comple

In [33]:
import re

def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports

def save_reports_as_txt(cleaned_reports, output_file_path):
    with open(output_file_path, 'w') as file:
        for image_id, report in cleaned_reports.items():
            file.write(f"Image ID: {image_id}\n")
            file.write(f"Report: {report}\n")
            file.write("--------------------------------------------------------------------------------\n")

file_path = 'generated_reports_4o.txt'
output_file_path = 'cleaned_generated_reports_4o.txt'

cleaned_generated_reports = extract_report_content(file_path)
save_reports_as_txt(cleaned_generated_reports, output_file_path)


print(f"Cleaned reports have been saved to {output_file_path}")


Cleaned reports have been saved to cleaned_generated_reports_4o.txt


In [34]:
import pandas as pd


all_reports_df = pd.read_csv('all_reports_df_images.csv')

data = []

with open('cleaned_generated_reports_4o.txt', 'r') as file:
    content = file.read().strip().split('--------------------------------------------------------------------------------')

for block in content:
    lines = block.strip().split('\n')
    if len(lines) > 1:
        try:
            image_id = lines[0].split(': ')[1]
            cleaned_report = ' '.join(lines[1:]).strip()
            data.append([image_id, cleaned_report])
        except IndexError:
            print(f"Skipping malformed block: {block}")

cleaned_reports_df_4o = pd.DataFrame(data, columns=['image_id', 'AI_generated_report_4o'])

filtered_df_4o = all_reports_df[all_reports_df['image_id'].isin(cleaned_reports_df_4o['image_id'])]

filtered_df_4o = filtered_df_4o.merge(cleaned_reports_df_4o, on='image_id', how='left')

filtered_df_4o.to_csv('filtered_reports_with_ai_4o.csv', index=False)



In [37]:
import pandas as pd
from bert_score import score
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


comparison_df = pd.read_csv('filtered_reports_with_ai.csv')

def calculate_scores(cleaned_report, ai_generated_report):
    P, R, F1 = score([ai_generated_report], [cleaned_report], lang="en", verbose=True)
    bert_score_precision = P.mean().item()
    bert_score_recall = R.mean().item()
    bert_score_f1 = F1.mean().item()
    
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([cleaned_report, ai_generated_report])
    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

    

    return (bert_score_precision, bert_score_recall, bert_score_f1, tfidf_score)

scores_4 = comparison_df.apply(lambda row: calculate_scores(row['cleaned_report'], row['AI_generated_report_GPT-4']), axis=1)
scores_4o = comparison_df.apply(lambda row: calculate_scores(row['cleaned_report'], row['AI_generated_report_GPT-4o']), axis=1)

comparison_df[['BERTScore_Precision_4', 'BERTScore_Recall_4', 'BERTScore_F1_4', 'tfidf_score_4']] = pd.DataFrame(scores_4.tolist(), index=comparison_df.index)
comparison_df[['BERTScore_Precision_4o', 'BERTScore_Recall_4o', 'BERTScore_F1_4o', 'tfidf_score_4o']] = pd.DataFrame(scores_4o.tolist(), index=comparison_df.index)

average_scores_4 = comparison_df[['BERTScore_Precision_4', 'BERTScore_Recall_4', 'BERTScore_F1_4',  'tfidf_score_4']].mean()
average_scores_4o = comparison_df[['BERTScore_Precision_4o', 'BERTScore_Recall_4o', 'BERTScore_F1_4o', 'tfidf_score_4o']].mean()

print("Average scores for GPT-4 generated reports:")
print(average_scores_4)

print("\nAverage scores for GPT-4o generated reports:")
print(average_scores_4o)

comparison_df.to_csv('comparison_scores.csv', index=False)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  2.42it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00,  8.39it/s]


done in 0.69 seconds, 1.44 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  3.28it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 27.26it/s]


done in 0.37 seconds, 2.70 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  2.70it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 29.03it/s]


done in 0.46 seconds, 2.16 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  5.65it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 269.99it/s]

done in 0.18 seconds, 5.41 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  7.09it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 100.02it/s]

done in 0.15 seconds, 6.64 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  4.62it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 17.74it/s]


done in 0.27 seconds, 3.64 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  6.33it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.17 seconds, 5.94 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  5.58it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]


done in 0.19 seconds, 5.28 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.00it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 170.49it/s]

done in 0.13 seconds, 7.80 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  3.30it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 30.34it/s]


done in 0.35 seconds, 2.88 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.32it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 95.76it/s]

done in 0.13 seconds, 7.61 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  4.97it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 110.68it/s]


done in 0.21 seconds, 4.75 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  5.14it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 99.71it/s]


done in 0.21 seconds, 4.81 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.38it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.13 seconds, 7.64 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.66it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 318.06it/s]

done in 0.11 seconds, 9.20 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 11.15it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.10 seconds, 10.24 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.52it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.11 seconds, 8.98 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 13.95it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 99.35it/s]


done in 0.08 seconds, 11.88 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  6.82it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 232.56it/s]

done in 0.15 seconds, 6.86 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 11.59it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.09 seconds, 11.29 sentences/sec
Average scores for GPT-4 generated reports:
BERTScore_Precision_4    0.831345
BERTScore_Recall_4       0.827029
BERTScore_F1_4           0.829072
tfidf_score_4            0.276860
dtype: float64

Average scores for GPT-4o generated reports:
BERTScore_Precision_4o    0.841910
BERTScore_Recall_4o       0.821181
BERTScore_F1_4o           0.831197
tfidf_score_4o            0.218646
dtype: float64





Random Radiology Image

In [5]:
from transformers import BertTokenizer, BertModel
from torchvision.models import resnet50
from torch import nn
import torch
class ALBEFModel(nn.Module):
    def __init__(self, image_model, text_model, embed_dim):
        super(ALBEFModel, self).__init__()
        self.image_model = image_model
        self.text_model = text_model
        self.image_proj = nn.Linear(image_model.fc.in_features, embed_dim)
        self.text_proj = nn.Linear(text_model.config.hidden_size, embed_dim)

    def forward(self, image, text_input_ids, text_attention_mask):
        image_features = self.image_model(image)
        image_embed = self.image_proj(image_features)

        text_output = self.text_model(input_ids=text_input_ids, attention_mask=text_attention_mask)
        text_features = text_output.last_hidden_state[:, 0, :]
        text_embed = self.text_proj(text_features)

        return image_embed, text_embed
image_model = resnet50(pretrained=True)
num_features = image_model.fc.in_features
image_model.fc = nn.Linear(num_features, num_features) 

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
text_model = BertModel.from_pretrained('bert-base-uncased')
embed_dim = 512
model = ALBEFModel(image_model, text_model, embed_dim)


model.load_state_dict(torch.load('albef_model_server.pth'))
model.eval()


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)



ALBEFModel(
  (image_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [50]:

from torchvision import transforms
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def generate_image_embeddings(dicom_file_path):
    with open(dicom_file_path, 'rb') as f:
        dicom_data = f.read()
    dicom_file = pydicom.dcmread(BytesIO(dicom_data))
    image_array = dicom_file.pixel_array
    image = Image.fromarray((image_array / np.max(image_array) * 255).astype(np.uint8)).convert('RGB')
    image = image_transforms(image).unsqueeze(0).to(device)
    with torch.no_grad():
        image_embed, _ = model(image, torch.zeros((1, 512), dtype=torch.int64).to(device), torch.zeros((1, 512), dtype=torch.int64).to(device))
    return image_embed.cpu().numpy().flatten()

dicomfile = '8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm'
image_embeddings_new = generate_image_embeddings(dicomfile)


In [103]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def generate_text_embeddings(text):
    tokenized_text = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    input_ids = tokenized_text['input_ids'].to(device)
    attention_mask = tokenized_text['attention_mask'].to(device)
    with torch.no_grad():
        _, text_embed = model(torch.zeros((1, 3, 224, 224)).to(device), input_ids, attention_mask)
    return text_embed.cpu().numpy().flatten()
import pandas as pd

triplets_path = 'triplets_server.csv'
triplets_df = pd.read_csv(triplets_path)

print(triplets_df.head())

triplets = list(triplets_df.itertuples(index=False, name=None))


def generate_triplet_embeddings(triplet):
    triplet_text = f"{triplet[0]} {triplet[1]} {triplet[2]}"
    return generate_text_embeddings(triplet_text)

triplet_embeddings = [generate_triplet_embeddings(triplet) for triplet in triplets]

                                             Entity1      Relation  \
0                                      Hair artifact   has symptom   
1                                      Hair artifact  described by   
2         Large left upper lobe necrotic lung cancer   has symptom   
3         Large left upper lobe necrotic lung cancer  described by   
4  Congenital lobar overinflation - left lung upp...   has symptom   

                                             Entity2  
0                               Shortness of breath.  
1  Heart size normal. Lungs clear. Pronounced ela...  
2                   Progressive shortness of breath.  
3  An 8 cm mass in the left upper lobe extends to...  
4                              Respiratory distress.  


In [102]:
len(triplets)

5

In [104]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

text_embeddings = np.load('text_embeddings_server.npy', allow_pickle=True).item()

def retrieve_top_k_texts_and_triplets(image_id, image_embedding, text_embeddings, triplet_embeddings, triplets, k=5):
    image_embeds = np.array([image_embedding])

    text_embeds = np.array([text_embeddings[key] for key in text_embeddings.keys()])
    triplet_embeds = np.array(triplet_embeddings)

    text_similarity_matrix = cosine_similarity(image_embeds, text_embeds)
    triplet_similarity_matrix = cosine_similarity(image_embeds, triplet_embeds)

    top_k_text_indices = text_similarity_matrix[0].argsort()[-k:][::-1]
    top_k_triplet_indices = triplet_similarity_matrix[0].argsort()[-k:][::-1]

    top_k_text_ids = [list(text_embeddings.keys())[index] for index in top_k_text_indices]
    top_k_triplets = [triplets[index] for index in top_k_triplet_indices]

    result = {
        'text_ids': top_k_text_ids,
        'triplets': top_k_triplets
    }
    
    return result

image_id = '8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm'

result = {image_id: retrieve_top_k_texts_and_triplets(image_id, image_embeddings_new, text_embeddings, triplet_embeddings, triplets, k=5)}


In [46]:
result

{'8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm': {'text_ids': ['30239_s50948352',
   '7560_s56450273',
   '5506_s56860376',
   '8448_s56350597',
   '20517_s50650436'],
  'triplets': [('Azygos fissure',
    'described by',
    'Incidental azygos fissure with mild opacity in the inferior aspect of the fissure.'),
   ('Inferior pulmonary ligament',
    'described by',
    'Lungs are well expanded and clear. Normal cardiomediastinal contour. Left hemidiaphragm retrocardiac irregularity represents the inferior pulmonary ligament'),
   ('Inferior pulmonary ligament',
    'described by',
    'Lungs are well expanded and clear. Normal cardiomediastinal contour. Left hemidiaphragm retrocardiac irregularity represents the inferior pulmonary ligament'),
   ('Rhomboid fossa of the clavicle',
    'described by',
    'The lungs and pleural spaces are clear. Cardiomediastinal contours and hilar regions within normal limits.\xa0Incidental prominant left rhomboid fossa of the clavicle on the l

In [25]:
pathology_words = ["atelectasis", "opacities", "consolidation", "effusion", "pneumothorax"]
positional_words = ["right", "left", "bilateral", "upper", "lower", "middle"]
severity_words = ["mild", "moderate", "severe"]
size_words = ["small", "large"]

def construct_detailed_prompt(image_id, context_texts, triplets):
    context = " ".join(context_texts)
    triplets_text = " ".join([f"{triplet[0]} {triplet[1]} {triplet[2]}" for triplet in triplets])

    prompt = f"""
    You are an assistant designed to write impression summaries for the radiology report.
    Users will send a context text and you will respond with an impression summary using that context.
    Instructions:
    • Impression should be based on the information that the user will send in the context.
    • The impression should not mention anything about follow-up actions.
    • Impression should not contain any mentions of prior or previous studies.
    • Use the following words for attributes where applicable:
        - Pathology: {', '.join(pathology_words)}
        - Positional: {', '.join(positional_words)}
        - Severity: {', '.join(severity_words)}
        - Size: {', '.join(size_words)}
    CONTEXT: {context} {triplets_text}
    IMPRESSION:
    """
    return prompt

In [26]:
all_reports_df=pd.read_csv("all_reports_df_images.csv")

In [32]:
selected_image_id = ['8000']

In [52]:
result

{'8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm': {'text_ids': ['30239_s50948352',
   '7560_s56450273',
   '5506_s56860376',
   '8448_s56350597',
   '20517_s50650436'],
  'triplets': [('Azygos fissure',
    'described by',
    'Incidental azygos fissure with mild opacity in the inferior aspect of the fissure.'),
   ('Inferior pulmonary ligament',
    'described by',
    'Lungs are well expanded and clear. Normal cardiomediastinal contour. Left hemidiaphragm retrocardiac irregularity represents the inferior pulmonary ligament'),
   ('Inferior pulmonary ligament',
    'described by',
    'Lungs are well expanded and clear. Normal cardiomediastinal contour. Left hemidiaphragm retrocardiac irregularity represents the inferior pulmonary ligament'),
   ('Rhomboid fossa of the clavicle',
    'described by',
    'The lungs and pleural spaces are clear. Cardiomediastinal contours and hilar regions within normal limits.\xa0Incidental prominant left rhomboid fossa of the clavicle on the l

In [47]:
detailed_prompts = {}

for image_id, data in result.items():
    if any(image_id.startswith(selected_id) for selected_id in selected_image_id):
        context_text_ids = data['text_ids']
        context_texts = []

        print(f"Processing Image ID: {image_id}")
        print(f"Top-5 Text IDs: {context_text_ids}")

        for text_id in context_text_ids:
            report_row = all_reports_df[all_reports_df['report_id'] == text_id]
            if not report_row.empty:
                context_texts.append(report_row['cleaned_report'].values[0])
            else:
                print(f"Warning: Report ID {text_id} not found in all_reports_df.")

        if not context_texts:
            print(f"Warning: No context texts found for image ID {image_id}.")
            continue

        triplets = data['triplets']
        detailed_prompts[image_id] = construct_detailed_prompt(image_id, context_texts, triplets)

for image_id, prompt in detailed_prompts.items():
    print(f"Prompt for Image ID: {image_id}\n{prompt}\n")

Processing Image ID: 8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm
Top-5 Text IDs: ['30239_s50948352', '7560_s56450273', '5506_s56860376', '8448_s56350597', '20517_s50650436']
Prompt for Image ID: 8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm

    You are an assistant designed to write impression summaries for the radiology report.
    Users will send a context text and you will respond with an impression summary using that context.
    Instructions:
    • Impression should be based on the information that the user will send in the context.
    • The impression should not mention anything about follow-up actions.
    • Impression should not contain any mentions of prior or previous studies.
    • Use the following words for attributes where applicable:
        - Pathology: atelectasis, opacities, consolidation, effusion, pneumothorax
        - Positional: right, left, bilateral, upper, lower, middle
        - Severity: mild, moderate, severe
        - Size: small, large

In [None]:
from openai import OpenAI
client = OpenAI(
    api_key='XXX',
)

In [54]:
def generate_report(prompt):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a medical AI specialized in generating radiology reports."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=300 
    )
    return(response)
    

generated_reports = {}
for image_id, prompt in detailed_prompts.items():
    generated_reports[image_id] = generate_report(prompt)
    print(f"Generated report for image {image_id}:\n{generated_reports[image_id]}\n")

with open('generated_reports.txt', 'w') as file:
    for image_id, report in generated_reports.items():
        file.write(f"Image ID: {image_id}\n")
        file.write(f"{report}\n")
        file.write("\n" + "-"*80 + "\n")

print("Report generation completed.")


Generated report for image 8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm:
ChatCompletion(id='chatcmpl-9pef5FgtYVxrakYA2776c8lLl7moC', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The lungs and cardiomediastinal silhouette are clear and within normal limits across all reports, with no signs of acute cardiopulmonary processes. There is no evidence of focal consolidation, pneumothorax, effusion or osseous abnormalities. An incidental finding of mild opacity is noted in the inferior aspect of the azygos fissure. Normal variants, including the prominent left rhomboid fossa of the clavicle and an irregularity of the left hemidiaphragm retrocardiac representing the inferior pulmonary ligament, were also detected. Overall, the examination reveals no acute cardiopulmonary abnormalities or pathology.', role='assistant', function_call=None, tool_calls=None))], created=1722098059, model='gpt-4-0613', object='chat.completion', service_

In [55]:
import re

def clean_report_text(text):
    text = re.sub(r'\n', ' ', text)  
    text = re.sub(r'\s+', ' ', text) 
    text = text.strip()
    return text


In [56]:
import re

def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports

def save_reports_as_txt(cleaned_reports, output_file_path):
    with open(output_file_path, 'w') as file:
        for image_id, report in cleaned_reports.items():
            file.write(f"Image ID: {image_id}\n")
            file.write(f"Report: {report}\n")
            file.write("--------------------------------------------------------------------------------\n")

file_path = 'generated_reports.txt'
output_file_path = 'cleaned_generated_reports_sample.txt'

cleaned_generated_reports = extract_report_content(file_path)
save_reports_as_txt(cleaned_generated_reports, output_file_path)

print(f"Cleaned reports have been saved to {output_file_path}")


Cleaned reports have been saved to cleaned_generated_reports_sample.txt


In [58]:
cleaned_generated_reports

{'8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7078.dcm': 'The lungs and cardiomediastinal silhouette are clear and within normal limits across all reports, with no signs of acute cardiopulmonary processes. There is no evidence of focal consolidation, pneumothorax, effusion or osseous abnormalities. An incidental finding of mild opacity is noted in the inferior aspect of the azygos fissure. Normal variants, including the prominent left rhomboid fossa of the clavicle and an irregularity of the left hemidiaphragm retrocardiac representing the inferior pulmonary ligament, were also detected. Overall, the examination reveals no acute cardiopulmonary abnormalities or pathology.'}

In [88]:
import pandas as pd


all_reports_df = pd.read_csv('sample_report_groundTruth.csv')

data = []

with open('cleaned_generated_reports_sample.txt', 'r') as file:
    content = file.read().strip().split('--------------------------------------------------------------------------------')

for block in content:
    lines = block.strip().split('\n')
    if len(lines) > 1:
        try:
            image_id = lines[0].split(': ')[1]
            cleaned_report = ' '.join(lines[1:]).strip()
            data.append([image_id, cleaned_report])
        except IndexError:
            print(f"Skipping malformed block: {block}")

cleaned_reports_df = pd.DataFrame(data, columns=['image_id', 'AI_generated_report'])
cleaned_reports_df.to_csv('clean_sample.csv')
clean_sample=pd.read_csv('clean_sample.csv')

filtered_df = all_reports_df[all_reports_df['image_id'].isin(cleaned_reports_df['image_id'])]

filtered_df = filtered_df.merge(cleaned_reports_df, on='image_id', how='left')

filtered_df.to_csv('filtered_reports_with_ai_sample.csv', index=False)



In [89]:
filtered_df

Unnamed: 0,image_id,report_id,cleaned_report,AI_generated_report
0,8000_0e82057a-5ba4dacb-c5333adf-de03650c-483b7...,8000_s55037150,Acute liver injury and abdominal pain. Concer...,Report: The lungs and cardiomediastinal silhou...


In [92]:
import pandas as pd
from bert_score import score
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


comparison_df = pd.read_csv('filtered_reports_with_ai_sample.csv')

def calculate_scores(cleaned_report, ai_generated_report):
    P, R, F1 = score([ai_generated_report], [cleaned_report], lang="en", verbose=True)
    bert_score_precision = P.mean().item()
    bert_score_recall = R.mean().item()
    bert_score_f1 = F1.mean().item()
    
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([cleaned_report, ai_generated_report])
    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

    

    return (bert_score_precision, bert_score_recall, bert_score_f1, tfidf_score)

scores_4 = comparison_df.apply(lambda row: calculate_scores(row['cleaned_report'], row['AI_generated_report']), axis=1)

comparison_df[['BERTScore_Precision_4', 'BERTScore_Recall_4', 'BERTScore_F1_4', 'tfidf_score_4']] = pd.DataFrame(scores_4.tolist(), index=comparison_df.index)

average_scores_4 = comparison_df[['BERTScore_Precision_4', 'BERTScore_Recall_4', 'BERTScore_F1_4',  'tfidf_score_4']].mean()

print("Average scores for GPT-4 generated reports:")
print(average_scores_4)





Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  1.60it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00,  6.64it/s]


done in 0.90 seconds, 1.11 sentences/sec
Average scores for GPT-4 generated reports:
BERTScore_Precision_4    0.833115
BERTScore_Recall_4       0.858473
BERTScore_F1_4           0.845604
tfidf_score_4            0.298376
dtype: float64


Few shot prompt example

In [21]:
pathology_words = ["atelectasis", "opacities", "consolidation", "effusion", "pneumothorax"]
positional_words = ["right", "left", "bilateral", "upper", "lower", "middle"]
severity_words = ["mild", "moderate", "severe"]
size_words = ["small", "large"]

def construct_detailed_prompt_with_few_shot(image_id, context_texts, triplets, few_shot_examples):
    context = " ".join(context_texts)
    triplets_text = " ".join([f"{triplet[0]} {triplet[1]} {triplet[2]}" for triplet in triplets])

    few_shot_prompt = "\n".join(few_shot_examples)

    prompt = f"""
    You are an assistant designed to write impression summaries for the radiology report.
    Users will send a context text and you will respond with an impression summary using that context.
    Instructions:
    • Impression should be based on the information that the user will send in the context.
    • The impression should not mention anything about follow-up actions.
    • Impression should not contain any mentions of prior or previous studies.
    • Use the following words for attributes where applicable:
        - Pathology: {', '.join(pathology_words)}
        - Positional: {', '.join(positional_words)}
        - Severity: {', '.join(severity_words)}
        - Size: {', '.join(size_words)}
    Examples:
    {few_shot_prompt}
    Your Turn:
    CONTEXT: {context} {triplets_text}
    IMPRESSION:
    """
    return prompt


few_shot_examples = [
    """
    Example 1:
    CONTEXT: Frontal chest radiographs were obtained with the patient in the upright position. COMPARISON: Radiographs from ___, ___ and ___. FINDINGS: The lungs are clear of focal consolidation, pleural effusion or pneumothorax. The heart size is normal. The mediastinal contours are normal. Multiple surgical clips project over the left breast, and old left rib fractures are noted. 
    IMPRESSION: No acute cardiopulmonary process.
    """,
    """
    Example 2:
    CONTEXT: Portable chest radiograph demonstrates unchanged mediastinal, hilar, and cardiac contours. There has been interval development of bibasilar opacities likely reflecting atelectasis, though cannot exclude developing infectious process. Additionally, there has been interval increase in small right-sided pleural effusion.
    IMPRESSION: Bibasilar opacities, likely atelectasis. Increased small right pleural effusion.
    """,
    """
    Example 3:
    CONTEXT:A tracheostomy is in place. Bullet fracture fragments are again noted bilaterally. Bilateral chest tubes are unchanged in positions. A right-sided pneumothorax has increased and is now small to moderate in size. There is no definite pleural effusion on the left. Vague retrocardiac opacity is similar and suggests atelectasis, but improved substantially. 
    IMPRESSION: Increase in right-sided pneumothorax, now small to moderate, with mild recurrent leftward shift.
    """
]



prompt = construct_detailed_prompt_with_few_shot(image_id, context_texts, triplets, few_shot_examples)
print(prompt)



    You are an assistant designed to write impression summaries for the radiology report.
    Users will send a context text and you will respond with an impression summary using that context.
    Instructions:
    • Impression should be based on the information that the user will send in the context.
    • The impression should not mention anything about follow-up actions.
    • Impression should not contain any mentions of prior or previous studies.
    • Use the following words for attributes where applicable:
        - Pathology: atelectasis, opacities, consolidation, effusion, pneumothorax
        - Positional: right, left, bilateral, upper, lower, middle
        - Severity: mild, moderate, severe
        - Size: small, large
    Examples:
    
    Example 1:
    CONTEXT: Frontal chest radiographs were obtained with the patient in the upright position. COMPARISON: Radiographs from ___, ___ and ___. FINDINGS: The lungs are clear of focal consolidation, pleural effusion or pneumoth

In [23]:
detailed_prompts = {}

for image_id, data in top_k_texts_and_triplets_selected.items():
    if any(image_id.startswith(selected_id) for selected_id in selected_image_id):
        context_text_ids = data['text_ids']
        context_texts = []

        print(f"Processing Image ID: {image_id}")
        print(f"Top-5 Text IDs: {context_text_ids}")

        for text_id in context_text_ids:
            report_row = all_reports_df[all_reports_df['report_id'] == text_id]
            if not report_row.empty:
                context_texts.append(report_row['cleaned_report'].values[0])
            else:
                print(f"Warning: Report ID {text_id} not found in all_reports_df.")

        if not context_texts:
            print(f"Warning: No context texts found for image ID {image_id}.")
            continue

        triplets = data['triplets']
        detailed_prompts[image_id] = construct_detailed_prompt_with_few_shot(image_id, context_texts, triplets,few_shot_examples)

for image_id, prompt in detailed_prompts.items():
    print(f"Prompt for Image ID: {image_id}\n{prompt}\n")

Processing Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Top-5 Text IDs: ['31402_s51718048', '18549_s50696726', '23163_s53192951', '19294_s59515023', '184_s54234360']
Processing Image ID: 173_eb2fabb7-4bbc8aab-d7371282-08e5bcb5-de2e430a
Top-5 Text IDs: ['9335_s56406916', '22008_s50002557', '11261_s58493244', '22683_s54917419', '194_s53950795']
Processing Image ID: 23_28fad2ac-d6001216-b4f72c5b-2d4d452e-17b6c9a5
Top-5 Text IDs: ['19653_s56073986', '20585_s56207921', '11908_s59124602', '24181_s59459488', '12707_s53019840']
Processing Image ID: 210_5053834b-b1bea04f-680aec42-45abe415-c2d097ba
Top-5 Text IDs: ['6870_s56090555', '7286_s53507895', '1758_s55579609', '5815_s56477239', '7183_s55768555']
Processing Image ID: 17_6ad819bb-bae74eb9-7b663e90-b8deabd7-57f8054a
Top-5 Text IDs: ['21587_s55564934', '19292_s58015625', '10631_s54279389', '5377_s51448682', '31901_s58288699']
Processing Image ID: 153_2c814e99-aa096010-bd722059-5a35bd69-a902e44e
Top-5 Text IDs: ['19535_s57131561

In [25]:
def generate_report(prompt):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a medical AI specialized in generating radiology reports."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=300 
    )
    return(response)
    

generated_reports = {}
for image_id, prompt in detailed_prompts.items():
    generated_reports[image_id] = generate_report(prompt)
    print(f"Generated report for image {image_id}:\n{generated_reports[image_id]}\n")

with open('generated_reports.txt', 'w') as file:
    for image_id, report in generated_reports.items():
        file.write(f"Image ID: {image_id}\n")
        file.write(f"{report}\n")
        file.write("\n" + "-"*80 + "\n")

print("Report generation completed.")


Generated report for image 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f:
ChatCompletion(id='chatcmpl-9qvmLEkp8BOnBdn6rim0feD2EwUy9', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='1. Bilateral perihilar opacities and posterior costophrenic angle blunting suggestive of effusions. Linear opacity in minor fissure region, potential atelectasis or other consolidation. Cardiac silhouette enlargement and aortic atherosclerotic calcifications noted.\n2. Increased right lower and middle lobe densities, indicative of possible consolidations. Moderate cardiomegaly with central vascular congestion and mild interstitial edema.\n3. Multifocal bilateral pulmonary metastases with no acute cardiopulmonary process detected. Chronic pulmonary fibrotic changes noted.\n4. Extensive chronic interstitial fibrotic changes, possible edema or infection, moderate cardiomegaly stable.\n5. Stable cardiomediastinal contours, worsening bibasilar opacities s

In [26]:
import re

def clean_report_text(text):
    text = re.sub(r'\n', ' ', text)  
    text = re.sub(r'\s+', ' ', text) 
    text = text.strip()
    return text


In [28]:
import re


def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    
    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

               
                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports


file_path = 'generated_reports.txt'

cleaned_generated_reports = extract_report_content(file_path)


for image_id, report in cleaned_generated_reports.items():
    print(f"Image ID: {image_id}")
    print(f"Report: {report}\n")



Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Report: 1. Bilateral perihilar opacities and posterior costophrenic angle blunting suggestive of effusions. Linear opacity in minor fissure region, potential atelectasis or other consolidation. Cardiac silhouette enlargement and aortic atherosclerotic calcifications noted. 2. Increased right lower and middle lobe densities, indicative of possible consolidations. Moderate cardiomegaly with central vascular congestion and mild interstitial edema. 3. Multifocal bilateral pulmonary metastases with no acute cardiopulmonary process detected. Chronic pulmonary fibrotic changes noted. 4. Extensive chronic interstitial fibrotic changes, possible edema or infection, moderate cardiomegaly stable. 5. Stable cardiomediastinal contours, worsening bibasilar opacities suggesting atelectasis, probable small bilateral pleural effusions. 6. Bilateral lower zone atelectasis and hazy airspace opacities in the context of sickle cell disease with acu

In [29]:
import re

def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports

def save_reports_as_txt(cleaned_reports, output_file_path):
    with open(output_file_path, 'w') as file:
        for image_id, report in cleaned_reports.items():
            file.write(f"Image ID: {image_id}\n")
            file.write(f"Report: {report}\n")
            file.write("--------------------------------------------------------------------------------\n")

file_path = 'generated_reports.txt'
output_file_path = 'cleaned_generated_reports_fewshot_GPT_4.txt'

cleaned_generated_reports = extract_report_content(file_path)
save_reports_as_txt(cleaned_generated_reports, output_file_path)

print(f"Cleaned reports have been saved to {output_file_path}")


Cleaned reports have been saved to cleaned_generated_reports_fewshot_GPT_4.txt


In [30]:
import pandas as pd


all_reports_df = pd.read_csv('all_reports_df_images.csv')

data = []

with open('cleaned_generated_reports_fewshot_GPT_4.txt', 'r') as file:
    content = file.read().strip().split('--------------------------------------------------------------------------------')

for block in content:
    lines = block.strip().split('\n')
    if len(lines) > 1:
        try:
            image_id = lines[0].split(': ')[1]
            cleaned_report = ' '.join(lines[1:]).strip()
            data.append([image_id, cleaned_report])
        except IndexError:
            print(f"Skipping malformed block: {block}")

cleaned_reports_df = pd.DataFrame(data, columns=['image_id', 'AI_generated_report'])

filtered_df = all_reports_df[all_reports_df['image_id'].isin(cleaned_reports_df['image_id'])]

filtered_df = filtered_df.merge(cleaned_reports_df, on='image_id', how='left')

filtered_df.to_csv('filtered_reports_with_ai_few_shot_GPT_4.csv', index=False)



In [31]:
def generate_report(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a medical AI specialized in generating radiology reports."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=300  
    )
    return(response)
    

generated_reports_4o = {}
for image_id, prompt in detailed_prompts.items():
    generated_reports[image_id] = generate_report(prompt)
    print(f"Generated report for image {image_id}:\n{generated_reports[image_id]}\n")

with open('generated_reports_4o.txt', 'w') as file:
    for image_id, report in generated_reports.items():
        file.write(f"Image ID: {image_id}\n")
        file.write(f"{report}\n")
        file.write("\n" + "-"*80 + "\n")

print("Report generation completed.")


Generated report for image 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f:
ChatCompletion(id='chatcmpl-9qvtRzs3D6AGuK2Gt1Q1mnLyPlyyo', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='1. Extensive chronic interstitial fibrotic changes with peripheral basilar predominance.\n2. Moderate cardiomegaly is stable.\n3. No pleural effusion or pneumothorax.', role='assistant', function_call=None, tool_calls=None))], created=1722402625, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_4e2b2da518', usage=CompletionUsage(completion_tokens=40, prompt_tokens=1560, total_tokens=1600))

Generated report for image 173_eb2fabb7-4bbc8aab-d7371282-08e5bcb5-de2e430a:
ChatCompletion(id='chatcmpl-9qvtSYMSr8fu06kGTnajYQVMiCTwn', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Moderate right pleural effusion with associated right lower lobe volume loss. Small 

In [35]:
import re


def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    
    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

               
                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports


file_path = 'generated_reports_4o.txt'

cleaned_generated_reports = extract_report_content(file_path)


for image_id, report in cleaned_generated_reports.items():
    print(f"Image ID: {image_id}")
    print(f"Report: {report}\n")



Image ID: 450_4c8f8ea6-47e8a94c-d0102870-54af5bd2-c538db5f
Report: 1. Extensive chronic interstitial fibrotic changes with peripheral basilar predominance. 2. Moderate cardiomegaly is stable. 3. No pleural effusion or pneumothorax.

Image ID: 173_eb2fabb7-4bbc8aab-d7371282-08e5bcb5-de2e430a
Report: Moderate right pleural effusion with associated right lower lobe volume loss. Small left pleural effusion with left lower lobe volume loss. Pulmonary vascular redistribution and moderate cardiomegaly. No pneumothorax. Patchy opacities in lung bases suggestive of atelectasis. Aortic calcifications and moderate multilevel degenerative changes in the thoracic spine noted.

Image ID: 23_28fad2ac-d6001216-b4f72c5b-2d4d452e-17b6c9a5
Report: No acute intrathoracic process. Normal cardiomediastinal contour. Incidental prominent left rhomboid fossa of the clavicle noted, a normal variant.

Image ID: 210_5053834b-b1bea04f-680aec42-45abe415-c2d097ba
Report: No pneumothorax. Right pleural effusion is sm

In [36]:
import re

def extract_report_content(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    reports = content.split("--------------------------------------------------------------------------------")
    cleaned_reports = {}

    for report in reports:
        image_id_match = re.search(r'Image ID: (\S+)', report)
        if image_id_match:
            image_id = image_id_match.group(1)
            
            report_content_match = re.search(r'content=\'(.*?)\'', report, re.DOTALL)
            if report_content_match:
                report_content = report_content_match.group(1).replace('\\n', ' ').replace('\\', '')

                cleaned_reports[image_id] = report_content.strip()
    
    return cleaned_reports

def save_reports_as_txt(cleaned_reports, output_file_path):
    with open(output_file_path, 'w') as file:
        for image_id, report in cleaned_reports.items():
            file.write(f"Image ID: {image_id}\n")
            file.write(f"Report: {report}\n")
            file.write("--------------------------------------------------------------------------------\n")

file_path = 'generated_reports_4o.txt'
output_file_path = 'cleaned_generated_reports_4o_few shot.txt'

cleaned_generated_reports = extract_report_content(file_path)
save_reports_as_txt(cleaned_generated_reports, output_file_path)


print(f"Cleaned reports have been saved to {output_file_path}")


Cleaned reports have been saved to cleaned_generated_reports_4o_few shot.txt


In [37]:
import pandas as pd


all_reports_df = pd.read_csv('all_reports_df_images.csv')

data = []

with open('cleaned_generated_reports_4o_few shot.txt', 'r') as file:
    content = file.read().strip().split('--------------------------------------------------------------------------------')

for block in content:
    lines = block.strip().split('\n')
    if len(lines) > 1:
        try:
            image_id = lines[0].split(': ')[1]
            cleaned_report = ' '.join(lines[1:]).strip()
            data.append([image_id, cleaned_report])
        except IndexError:
            print(f"Skipping malformed block: {block}")

cleaned_reports_df_4o = pd.DataFrame(data, columns=['image_id', 'AI_generated_report_4o'])

filtered_df_4o = all_reports_df[all_reports_df['image_id'].isin(cleaned_reports_df_4o['image_id'])]

filtered_df_4o = filtered_df_4o.merge(cleaned_reports_df_4o, on='image_id', how='left')

filtered_df_4o.to_csv('filtered_reports_with_ai_4o_fewshot.csv', index=False)



In [41]:
import pandas as pd

df_gpt4 = pd.read_csv('filtered_reports_with_ai_few_shot_GPT_4.csv')
df_4o = pd.read_csv('filtered_reports_with_ai_4o_fewshot.csv')


merged_df = pd.merge(df_gpt4,df_4o , on=['image_id', 'report_id', 'cleaned_report'], suffixes=('_GPT_4','_4o'))

merged_df.to_csv('merged_reports.csv', index=False)



In [43]:
import pandas as pd
from bert_score import score
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


comparison_df = pd.read_csv('merged_reports.csv')

def calculate_scores(cleaned_report, ai_generated_report):
    P, R, F1 = score([ai_generated_report], [cleaned_report], lang="en", verbose=True)
    bert_score_precision = P.mean().item()
    bert_score_recall = R.mean().item()
    bert_score_f1 = F1.mean().item()
    
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([cleaned_report, ai_generated_report])
    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

    

    return (bert_score_precision, bert_score_recall, bert_score_f1, tfidf_score)

scores_4 = comparison_df.apply(lambda row: calculate_scores(row['cleaned_report'], row['AI_generated_report']), axis=1)
scores_4o = comparison_df.apply(lambda row: calculate_scores(row['cleaned_report'], row['AI_generated_report_4o']), axis=1)

comparison_df[['BERTScore_Precision_4', 'BERTScore_Recall_4', 'BERTScore_F1_4', 'tfidf_score_4']] = pd.DataFrame(scores_4.tolist(), index=comparison_df.index)
comparison_df[['BERTScore_Precision_4o', 'BERTScore_Recall_4o', 'BERTScore_F1_4o', 'tfidf_score_4o']] = pd.DataFrame(scores_4o.tolist(), index=comparison_df.index)

average_scores_4 = comparison_df[['BERTScore_Precision_4', 'BERTScore_Recall_4', 'BERTScore_F1_4',  'tfidf_score_4']].mean()
average_scores_4o = comparison_df[['BERTScore_Precision_4o', 'BERTScore_Recall_4o', 'BERTScore_F1_4o', 'tfidf_score_4o']].mean()

print("Average scores for GPT-4 generated reports:")
print(average_scores_4)

print("\nAverage scores for GPT-4o generated reports:")
print(average_scores_4o)

comparison_df.to_csv('comparison_scores_fewshot.csv', index=False)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  6.71it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.15 seconds, 6.54 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.53it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.12 seconds, 8.15 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  5.21it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]


done in 0.21 seconds, 4.79 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  6.04it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 99.06it/s]

done in 0.18 seconds, 5.51 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 10.01it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 99.88it/s]

done in 0.12 seconds, 8.56 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  7.99it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 140.23it/s]

done in 0.13 seconds, 7.47 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.02it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 305.44it/s]

done in 0.13 seconds, 7.45 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 12.60it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.09 seconds, 10.94 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.53it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 161.26it/s]

done in 0.12 seconds, 8.61 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  4.87it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 136.67it/s]


done in 0.21 seconds, 4.68 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.18it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 72.97it/s]

done in 0.14 seconds, 6.90 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.65it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 124.63it/s]

done in 0.12 seconds, 8.67 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  5.35it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]


done in 0.19 seconds, 5.21 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.11it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 206.92it/s]

done in 0.13 seconds, 7.50 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.65it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 221.82it/s]

done in 0.12 seconds, 8.30 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 11.73it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 174.81it/s]

done in 0.10 seconds, 10.26 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  7.82it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.14 seconds, 7.12 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 11.56it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 177.03it/s]

done in 0.10 seconds, 10.24 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.36it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.12 seconds, 8.37 sentences/sec



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 12.01it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<?, ?it/s]

done in 0.09 seconds, 10.98 sentences/sec
Average scores for GPT-4 generated reports:
BERTScore_Precision_4    0.836496
BERTScore_Recall_4       0.818821
BERTScore_F1_4           0.827462
tfidf_score_4            0.190207
dtype: float64

Average scores for GPT-4o generated reports:
BERTScore_Precision_4o    0.857200
BERTScore_Recall_4o       0.818081
BERTScore_F1_4o           0.837124
tfidf_score_4o            0.183484
dtype: float64



