In [3]:
import numpy as np
from scipy.spatial.distance import directed_hausdorff
import os
import matplotlib.pyplot as plt
import warnings
import re
import numpy as np

# Suppress SyntaxWarnings
warnings.filterwarnings("ignore", category=SyntaxWarning)

import re
import numpy as np
import os
import matplotlib.pyplot as plt

plt.rcParams.update({'font.size': 8})
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']

def parse_and_evaluate_lines_from_file(filename):
    point_inside_box_list = []
    euclidean_distance_list = []
    relative_error_list = []
    shortest_distance_list = []
    ground_truth_centers = []
    model_outputs = []
    bbox_diags = []

    with open(filename, 'r') as file:
        lines = file.readlines()

    for line in lines:
        # Extract ground truth center
        ground_truth_center = re.search(r"Ground truth centre: \[\((\d+), (\d+)\)]", line)
        ground_truth_center = (int(ground_truth_center.group(1)), int(ground_truth_center.group(2))) if ground_truth_center else None

        # Extract ground truth bounding box
        ground_truth_bbox = re.search(r"Ground truth bbox: \[\((\d+), (\d+)\), \((\d+), (\d+)\), \((\d+), (\d+)\), \((\d+), (\d+)\)]", line)
        ground_truth_bbox = [(int(ground_truth_bbox.group(i)), int(ground_truth_bbox.group(i+1))) for i in range(1, 9, 2)] if ground_truth_bbox else None

        # Extract model output center
        model_output = re.search(r"Model output: \((\d+), (\d+)\)", line)
        model_output = (int(model_output.group(1)), int(model_output.group(2))) if model_output else None

        # Skip if any value is missing
        if not ground_truth_center or not ground_truth_bbox or not model_output:
            continue

        # Evaluate the center prediction
        point_inside_box, euclidean_distance, relative_error, shortest_distance, bbox_diag = evaluate_center_prediction(ground_truth_bbox, model_output)

        # Store results
        point_inside_box_list.append(point_inside_box)
        euclidean_distance_list.append(euclidean_distance)
        shortest_distance_list.append(shortest_distance)
        ground_truth_centers.append(ground_truth_center)
        model_outputs.append(model_output)
        bbox_diags.append(bbox_diag)

        # Print bounding box if relative error is abnormally large
        if relative_error < 1000:  # Adjust threshold if needed
            relative_error_list.append(relative_error)
            # print(f"High relative error detected: {relative_error}, Bounding Box: {ground_truth_bbox}, Model Output: {model_output}")

    # Calculate average ground truth and model prediction centers
    avg_ground_truth_center = tuple(np.mean(ground_truth_centers, axis=0)) if ground_truth_centers else (None, None)
    avg_model_output = tuple(np.mean(model_outputs, axis=0)) if model_outputs else (None, None)

    decimals = 2
    return (
        round(np.mean(point_inside_box_list), decimals),
        round(np.mean(euclidean_distance_list), decimals),
        round(np.mean(relative_error_list), decimals),
        round(np.mean(shortest_distance_list), decimals),
        len(point_inside_box_list),
        ground_truth_centers,
        model_outputs,
        tuple(map(lambda x: round(x), avg_ground_truth_center)),
        round(np.mean(bbox_diags), decimals)
        # tuple(map(lambda x: round(x), avg_model_output))
    )

def evaluate_center_prediction(bbox, model_output):
    x, y = model_output
    x_min, y_min = bbox[0]
    x_max, y_max = bbox[2]

    # Check if point is inside bounding box
    point_inside_box = int(x_min <= x <= x_max and y_min <= y <= y_max)

    # Euclidean distance to ground truth center
    center_x, center_y = (x_min + x_max) / 2, (y_min + y_max) / 2
    euclidean_distance = np.sqrt((x - center_x) ** 2 + (y - center_y) ** 2)

    # Relative error
    bbox_diag = np.sqrt((x_max - x_min) ** 2 + (y_max - y_min) ** 2)
    relative_error = euclidean_distance / bbox_diag if bbox_diag > 0 else float('inf')

    # Shortest distance to box (minimum distance to any edge)
    left = abs(x - x_min)
    right = abs(x - x_max)
    top = abs(y - y_min)
    bottom = abs(y - y_max)
    shortest_distance = min(left, right, top, bottom)

    return point_inside_box, euclidean_distance, relative_error, shortest_distance, bbox_diag


def list_sorted_txt_files(folder_path):
    try:
        # Get all .txt filenames in the folder
        txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

        # Sort the files based on the numeric value after the underscore
        sorted_files = sorted(txt_files, key=lambda x: int(x.split('_')[-1].split('.')[0]))

        # Lists to store data
        patient_nums = []
        # avg_gt_x, avg_gt_y = [], []
        # avg_mp_x, avg_mp_y = [], []
        percent_in_bb = []
        shortest_dist_bb = []
        euclidean_dist = []
        relative_error = []
        patient_data = {}

        # Print the sorted filenames and process the files
        for file in sorted_files:
            num = int(file.split('_')[-1].split('.')[0])

            # run parse and evaluate
            results = parse_and_evaluate_lines_from_file(os.path.join(folder_path, file))

            # Store values
            patient_nums.append(num)
            # avg_gt_x.append(results[5][0])
            # avg_gt_y.append(results[5][1])
            # avg_mp_x.append(results[6][0])
            # avg_mp_y.append(results[6][1])
            patient_data[num] = {
                "gt_points": results[5],  # List of (x, y) GT points
                "mp_points": results[6],  # List of (x, y) MP points
                "avg_gt_center": results[7],
                "bbox_diag": results[8]
            }
            percent_in_bb.append(results[0])
            shortest_dist_bb.append(results[3])
            euclidean_dist.append(results[1])
            relative_error.append(results[2])

            # Print formatted results
            # print(f"Patient: {num} || TotalSlices: {results[4]}, AvgGT: {results[5]}, AvgMP: {results[6]} %point_in_bb: {results[0]}, shortest_dist_bb: {results[3]}, euclid_dist: {results[1]}, rel_err: {results[2]}")
            # print(f"Patient: {num} || TotalSlices: {results[4]}, %point_in_bb: {results[0]}, shortest_dist_bb: {results[3]}, euclid_dist: {results[1]}, rel_err: {results[2]}")
            # print(f"Patient: {num} || TotalSlices: {results[4]}, GT Points: {len(results[5])}, MP Points: {len(results[6])}")


        # Define number of plots per row
        plots_per_row = 5
        num_patients = len(patient_data)
        num_rows = (num_patients + plots_per_row - 1) // plots_per_row  # Calculate required rows

        fig, axes = plt.subplots(num_rows, plots_per_row, figsize=(10, 2 * num_rows))  # Adjust figure size

        # Flatten axes for easy indexing (handles cases where num_patients < plots_per_row)
        axes = axes.flatten() if num_rows > 1 else [axes]

        # Generate scatter plots
        for i, (patient_num, data) in enumerate(patient_data.items()):
            gt_x, gt_y = zip(*data["gt_points"]) if data["gt_points"] else ([], [])
            mp_x, mp_y = zip(*data["mp_points"]) if data["mp_points"] else ([], [])

            # Extract bounding box properties
            center_x, center_y = data["avg_gt_center"]
            diag = data["bbox_diag"]

            # Compute half side length of square bbox (assuming square for simplicity)
            half_side = (diag / np.sqrt(2)) / 2

            # Compute bbox corners
            x_min, x_max = center_x - half_side, center_x + half_side
            y_min, y_max = center_y - half_side, center_y + half_side

            # Draw bounding box
            rect = plt.Rectangle((x_min, y_min), 2 * half_side, 2 * half_side,
                                linewidth=1, edgecolor='green', facecolor='none')
            axes[i].add_patch(rect)

            axes[i].scatter(gt_x, gt_y, color='blue', label='GT', alpha=0.6, s=5)
            axes[i].scatter(mp_x, mp_y, color='red', label='MP', alpha=0.6, s=5)
            axes[i].set_xlim(0, 128)
            axes[i].set_ylim(0, 128)
            axes[i].set_title(f"Patient {patient_num}", fontsize=8)
            axes[i].tick_params(axis='both', which='both', length=1)  # Hide ticks
            axes[i].set_xticks([])
            axes[i].set_yticks([])

        # Hide any unused subplots (in case num_patients is not a multiple of 5)
        for j in range(i + 1, len(axes)):
            fig.delaxes(axes[j])

        # Adjust layout
        plt.tight_layout()
        plt.show()

        # Create subplots
        fig, axs = plt.subplots(2, 2, figsize=(10, 10))

        # Scatter plot: % points in bounding box
        axs[0, 0].scatter(patient_nums, percent_in_bb, color='green', marker='o')
        axs[0, 0].set_title("% Points in Bounding Box")
        axs[0, 0].set_xlabel("Patient Number")
        axs[0, 0].set_ylabel("% in BB")

        # Scatter plot: Shortest Distance to Bounding Box
        axs[1, 0].scatter(patient_nums, shortest_dist_bb, color='purple', marker='o')
        axs[1, 0].set_title("Shortest Distance to Bounding Box")
        axs[1, 0].set_xlabel("Patient Number")
        axs[1, 0].set_ylabel("Distance")

        # Scatter plot: Euclidean Distance
        axs[1, 1].scatter(patient_nums, euclidean_dist, color='orange', marker='o')
        axs[1, 1].set_title("Euclidean Distance")
        axs[1, 1].set_xlabel("Patient Number")
        axs[1, 1].set_ylabel("Distance")

        # Scatter plot: Relative Error
        axs[0, 1].scatter(patient_nums, relative_error, color='brown', marker='o')
        axs[0, 1].set_title("Relative Error")
        axs[0, 1].set_xlabel("Patient Number")
        axs[0, 1].set_ylabel("Error")

        # Adjust layout and display
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
import numpy as np
from scipy.spatial.distance import directed_hausdorff
import os
import matplotlib.pyplot as plt
import warnings
import re

# Suppress SyntaxWarnings
warnings.filterwarnings("ignore", category=SyntaxWarning)

plt.rcParams.update({'font.size': 8})
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']

def parse_and_evaluate_lines_from_file(filename):
    point_inside_box_list = []
    euclidean_distance_list = []
    shortest_distance_list = []
    ground_truth_centers = []
    model_outputs = []
    bbox_diags = []

    with open(filename, 'r') as file:
        lines = file.readlines()

    for line in lines:
        # Extract ground truth center
        ground_truth_center = re.search(r"Ground truth centre: \[\((\d+), (\d+)\)]", line)
        ground_truth_center = (int(ground_truth_center.group(1)), int(ground_truth_center.group(2))) if ground_truth_center else None

        # Extract ground truth bounding box
        ground_truth_bbox = re.search(r"Ground truth bbox: \[\((\d+), (\d+)\), \((\d+), (\d+)\), \((\d+), (\d+)\), \((\d+), (\d+)\)]", line)
        ground_truth_bbox = [(int(ground_truth_bbox.group(i)), int(ground_truth_bbox.group(i+1))) for i in range(1, 9, 2)] if ground_truth_bbox else None

        # Extract model output center
        model_output = re.search(r"Model output: \((\d+), (\d+)\)", line)
        model_output = (int(model_output.group(1)), int(model_output.group(2))) if model_output else None

        # Skip if any value is missing
        if not ground_truth_center or not ground_truth_bbox or not model_output:
            continue

        # Evaluate the center prediction
        point_inside_box, euclidean_distance, shortest_distance, bbox_diag = evaluate_center_prediction(ground_truth_bbox, model_output)

        # Store results
        point_inside_box_list.append(point_inside_box)
        euclidean_distance_list.append(euclidean_distance)
        shortest_distance_list.append(shortest_distance)
        ground_truth_centers.append(ground_truth_center)
        model_outputs.append(model_output)
        bbox_diags.append(bbox_diag)

    # Calculate the 95th percentile Euclidean distance
    euclidean_distance_95th = np.percentile(euclidean_distance_list, 95) if euclidean_distance_list else None
    shortest_distance_95th = np.percentile(shortest_distance_list, 95) if shortest_distance_list else None

    # Average ground truth and model prediction centers
    avg_ground_truth_center = tuple(np.mean(ground_truth_centers, axis=0)) if ground_truth_centers else (None, None)
    avg_model_output = tuple(np.mean(model_outputs, axis=0)) if model_outputs else (None, None)

    decimals = 4
    return (
        round(np.mean(point_inside_box_list), decimals),
        round(euclidean_distance_95th, decimals),
        round(shortest_distance_95th, decimals),
        len(point_inside_box_list),
        ground_truth_centers,
        model_outputs,
        tuple(map(lambda x: round(x), avg_ground_truth_center)),
        round(np.mean(bbox_diags), decimals)
    )

def evaluate_center_prediction(bbox, model_output):
    x, y = model_output
    x_min, y_min = bbox[0]
    x_max, y_max = bbox[2]

    # Check if point is inside bounding box
    point_inside_box = int(x_min <= x <= x_max and y_min <= y <= y_max)

    # Euclidean distance to ground truth center
    center_x, center_y = (x_min + x_max) / 2, (y_min + y_max) / 2
    euclidean_distance = np.sqrt((x - center_x) ** 2 + (y - center_y) ** 2)

    # Shortest distance to box (minimum distance to any edge)
    left = abs(x - x_min)
    right = abs(x - x_max)
    top = abs(y - y_min)
    bottom = abs(y - y_max)
    shortest_distance = min(left, right, top, bottom)

    return point_inside_box, euclidean_distance, shortest_distance, np.sqrt((x_max - x_min) ** 2 + (y_max - y_min) ** 2)

def list_sorted_txt_files(folder_path, output_folder, title):

    # Get all .txt filenames in the folder
    txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

    # Sort the files based on the numeric value after the underscore
    sorted_files = sorted(txt_files, key=lambda x: int(x.split('_')[-1].split('.')[0]))

    # Lists to store data
    patient_nums = []
    percent_in_bb = []
    shortest_dist_bb = []
    euclidean_dist_95th = []
    patient_data = {}

    # Print the sorted filenames and process the files
    for file in sorted_files:
        num = int(file.split('_')[-1].split('.')[0])

        # run parse and evaluate
        results = parse_and_evaluate_lines_from_file(os.path.join(folder_path, file))


        # Store values
        patient_nums.append(num)
        patient_data[num] = {
            "gt_points": results[4],  # List of (x, y) GT points
            "mp_points": results[5],  # List of (x, y) MP points
            "avg_gt_center": results[6],
            "bbox_diag": results[7]
        }
        percent_in_bb.append(results[0])
        shortest_dist_bb.append(results[2])
        euclidean_dist_95th.append(results[1])

    # # Box and Whiskers Plot
    plot_box_and_whiskers_custom(percent_in_bb, shortest_dist_bb, euclidean_dist_95th, output_folder, title)

    return patient_data

import numpy as np
import matplotlib.pyplot as plt

def plot_box_and_whiskers_custom(percent_in_bb, shortest_dist_bb, euclidean_dist_95th, output_folder, title):
    """Generate a box-and-whisker plot with combined categories and overlaid scatter points."""
    
    shortest_dist_bb_normalized = np.array(shortest_dist_bb) / 128
    euclidean_dist_95th_normalized = np.array(euclidean_dist_95th) / 128


    # Prepare the data for box plot
    data = {
        "% Points in Bounding Box": percent_in_bb,
        "95th Percentile Shortest Distance to Bounding Box": shortest_dist_bb_normalized,
        "95th Percentile Euclidean Distance": euclidean_dist_95th_normalized
    }

    # print dataset avaerages
    print(f"Average % Points in Bounding Box: {np.mean(percent_in_bb):.4f}")
    print(f"Average 95th Percentile Shortest Distance to Bounding Box: {np.mean(np.array(shortest_dist_bb)):.4f}")
    print(f"Average 95th Percentile Euclidean Distance: {np.mean(np.array(euclidean_dist_95th)):.4f}")

    # Convert data dictionary to list for boxplot
    box_data = [data[key] for key in data]

    # Boxplot settings
    plt.figure(figsize=(10, 5))
    # box = plt.boxplot(box_data, patch_artist=True, showfliers=False, vert=False, 
    #                   widths=0.6, positions=np.arange(1, len(data) + 1))
    box = plt.boxplot(box_data, patch_artist=True, showfliers=False, tick_labels=data.keys())  

    # Set custom colors with transparency
    colors = ['#1E90FF', '#32CD32', '#FF6347']
    for patch, color in zip(box['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.5)  # Make boxplot bars translucent

    # Scatter plot overlay
    for i, (key, color) in enumerate(zip(data.keys(), colors)):  
        y_values = data[key]
        x_values = np.random.normal(i + 1, 0.05, size=len(y_values))  # Jitter for better visualization
        plt.scatter(x_values, y_values, alpha=0.8, color=color, s=10)  # Use matching color

    # Set the x-axis labels to metric names (categories)
    plt.xticks(np.arange(1, len(data) + 1), list(data.keys()))

    # Labels and aesthetics
    plt.xlabel("Evaluation Metric Catagories")
    plt.ylabel("Evaluation Metric Values")
    plt.title(title)
    plt.ylim(-0.05, 1.05)
    plt.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
    plt.axhline(y=1, color='black', linestyle='--', linewidth=0.8)

    # Grid and layout
    plt.grid(True)
    plt.tight_layout()  # Adjust layout to prevent overlaps
    # save
    output_path = os.path.join(output_folder, f"{title.replace(' ', '_')}.png")
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    print(f"Saved plot to {output_path}")
    plt.close()



import os
import matplotlib.pyplot as plt
import numpy as np

def generate_patient_plots(patient_data, output_folder):
    """Generate and save scatter plots for each patient into the output folder."""
    
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Generate scatter plots for each patient
    for patient_num, data in patient_data.items():
        fig, ax = plt.subplots(figsize=(5, 5))  # Create a new figure for each patient
        
        gt_x, gt_y = zip(*data["gt_points"]) if data["gt_points"] else ([], [])
        mp_x, mp_y = zip(*data["mp_points"]) if data["mp_points"] else ([], [])

        # Extract bounding box properties
        center_x, center_y = data["avg_gt_center"]
        diag = data["bbox_diag"]

        # Compute half side length of square bbox (assuming square for simplicity)
        half_side = (diag / np.sqrt(2)) / 2

        # Compute bbox corners
        x_min, x_max = center_x - half_side, center_x + half_side
        y_min, y_max = center_y - half_side, center_y + half_side

        # Draw bounding box
        rect = plt.Rectangle((x_min, y_min), 2 * half_side, 2 * half_side,
                             linewidth=1, edgecolor='green', facecolor='none')
        ax.add_patch(rect)

        # Scatter plots for ground truth (GT) and model predictions (MP)
        ax.scatter(gt_x, gt_y, color=(30/255, 0/255, 230/255), label='GT', alpha=0.6, s=5)
        ax.scatter(mp_x, mp_y, color=(128/255, 0/255, 0/255), label='MP', alpha=0.6, s=5)
        ax.set_xlim(0, 128)
        ax.set_ylim(0, 128)
        ax.set_title(f"Test Patient ID {patient_num+1} - GT(blue) MP(red)", fontsize=8)
        ax.tick_params(axis='both', which='both', length=1)  # Hide ticks
        ax.set_xticks([])
        ax.set_yticks([])

        # Save the plot for the patient
        save_path = os.path.join(output_folder, f"patient_{patient_num}_plot.png")
        fig.savefig(save_path, dpi=300, bbox_inches='tight')  # Save with high resolution
        plt.close(fig)  # Close the figure to free memory

        print(f"Plot saved for Patient {patient_num} at: {save_path}")

    print(f"All patient plots have been saved to {output_folder}")



In [59]:
# OG RESULTS
main_results_folder = "/Users/felicialiu/Desktop/ESC499/Code2/segcen/segcen_gen_results"
output_folder = "/Users/felicialiu/Desktop/ESC499/Code2/segcen/images"
title = "General LLM Center Point Segmentation Results on Axial Slices"
patient_data = list_sorted_txt_files(main_results_folder, output_folder, title)
generate_patient_plots(patient_data, "/Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images")


Average % Points in Bounding Box: 0.0805
Average 95th Percentile Shortest Distance to Bounding Box: 22.8566
Average 95th Percentile Euclidean Distance: 57.4112
Saved plot to /Users/felicialiu/Desktop/ESC499/Code2/segcen/images/General_LLM_Center_Point_Segmentation_Results_on_Axial_Slices.png
Plot saved for Patient 0 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images/patient_0_plot.png
Plot saved for Patient 1 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images/patient_1_plot.png
Plot saved for Patient 2 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images/patient_2_plot.png
Plot saved for Patient 3 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images/patient_3_plot.png
Plot saved for Patient 4 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images/patient_4_plot.png
Plot saved for Patient 5 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/gen_images/patient_5_plot.png
Plot saved for Patient 6 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/ge

In [60]:
# 200 STEPS RESULTS
main_results_folder = "/Users/felicialiu/Desktop/ESC499/Code2/segcen/segcen_200steps_results"
output_folder = "/Users/felicialiu/Desktop/ESC499/Code2/segcen/images"
title = "Fine-Tuned LLM Center Point Segmentation Results on Axial Slices"
patient_data = list_sorted_txt_files(main_results_folder, output_folder, title)
generate_patient_plots(patient_data, "/Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images")

Average % Points in Bounding Box: 0.0800
Average 95th Percentile Shortest Distance to Bounding Box: 26.2855
Average 95th Percentile Euclidean Distance: 68.4812
Saved plot to /Users/felicialiu/Desktop/ESC499/Code2/segcen/images/Fine-Tuned_LLM_Center_Point_Segmentation_Results_on_Axial_Slices.png
Plot saved for Patient 0 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images/patient_0_plot.png
Plot saved for Patient 1 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images/patient_1_plot.png
Plot saved for Patient 2 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images/patient_2_plot.png
Plot saved for Patient 3 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images/patient_3_plot.png
Plot saved for Patient 4 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images/patient_4_plot.png
Plot saved for Patient 5 at: /Users/felicialiu/Desktop/ESC499/Code2/segcen/finetune_images/patient_5_plot.png
Plot saved for Patient 6 at: /Users/felicial