In [129]:
import numpy as np
import os
import matplotlib.pyplot as plt
import glob
import pandas as pd

# Updated plot settings for consistency
plt.rcParams.update({'font.size': 8})
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']

# Collect metrics in the process function and later use them for plotting
def process_bounding_box(box):
    """Process the bounding box to convert into a usable format"""
    if box is None or np.array_equal(box, [(-1, -1)] * 4):
        return None
    if isinstance(box, (list, np.ndarray)) and len(box) == 4 and all(isinstance(x, int) for x in box):
        row_min, col_min, row_max, col_max = box
        return np.array([(row_min, col_min), (row_min, col_max), (row_max, col_max), (row_max, col_min)])
    try:
        box = np.array(box, dtype=int)
        return box if box.shape == (4, 2) else None
    except:
        return None

def hausdorff_95(gt_mask, mo_mask):
    """Calculate the 95th percentile of Hausdorff Distance"""
    if not np.any(gt_mask) or not np.any(mo_mask):
        return 128
    gt_boundary = np.argwhere(np.gradient(gt_mask.astype(float))[0] != 0)
    mo_boundary = np.argwhere(np.gradient(mo_mask.astype(float))[0] != 0)
    if len(gt_boundary) == 0 or len(mo_boundary) == 0:
        return 128
    dists = [directed_hausdorff(gt_boundary, mo_boundary)[0], directed_hausdorff(mo_boundary, gt_boundary)[0]]
    return np.percentile(dists, 95)

def calculate_metrics(gt, mo):
    """Calculate evaluation metrics: Dice, HD95, Precision, Recall, Specificity"""
    gt_mask = np.zeros((128, 128), dtype=np.uint8)
    mo_mask = np.zeros((128, 128), dtype=np.uint8)
    gt_rows, gt_cols = zip(*gt)
    mo_rows, mo_cols = zip(*mo)
    gt_mask[min(gt_rows):max(gt_rows)+1, min(gt_cols):max(gt_cols)+1] = 1
    mo_mask[min(mo_rows):max(mo_rows)+1, min(mo_cols):max(mo_cols)+1] = 1
    
    intersection = np.logical_and(gt_mask, mo_mask).sum()
    union = np.logical_or(gt_mask, mo_mask).sum()
    dice = 2 * intersection / (gt_mask.sum() + mo_mask.sum())
    hd = hausdorff_95(gt_mask, mo_mask)
    tp, fp, fn, tn = (np.logical_and(gt_mask, mo_mask).sum(),
                       np.logical_and(1 - gt_mask, mo_mask).sum(),
                       np.logical_and(gt_mask, 1 - mo_mask).sum(),
                       np.logical_and(1 - gt_mask, 1 - mo_mask).sum())
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    specificity = tn / (tn + fp) if tn + fp > 0 else 0
    return dice, hd, precision, recall, specificity

def process_file(filepath, last_gt_box=None):
    """Process the bounding box file and calculate metrics"""
    if not os.path.exists(filepath) or os.stat(filepath).st_size == 0:
        return (None,)*5, last_gt_box

    with open(filepath, 'r') as file:
        lines = file.readlines()
    
    metrics = []
    for line in lines:
        if "Ground truth" in line and "Model output" in line:
            gt_box, mo_box = map(str.strip, line.split("||"))
            gt = process_bounding_box(parse_bounding_box(gt_box.split(":")[1]))
            mo = process_bounding_box(parse_bounding_box(mo_box.split(":")[1]))
            
            if gt is None:
                gt = last_gt_box  # Use the last valid GT if the current one is None
            
            if gt is not None and mo is not None:
                metrics.append(calculate_metrics(gt, mo))
                last_gt_box = gt  # Update last valid GT box for the next iteration

    return np.mean(metrics, axis=0) if metrics else (None,)*5, last_gt_box

def process_folder(folder_path):
    """Process all files in the folder and collect metrics for plotting"""
    files = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')],
                   key=lambda x: int(x.split('_')[-1].split('.')[0]))

    all_metrics = []
    last_gt_box = None
    for file in files:
        num = int(file.split('_')[-1].split('.')[0])
        results, last_gt_box = process_file(os.path.join(folder_path, file), last_gt_box)
        if None in results: 
            pass
            # print(f"Patient {num}: N/A")
        else:
            all_metrics.append(list(results))
            # print(f"Patient {num}: {results}")

    all_metrics = np.array(all_metrics)
    # get means for all catagories
    means = np.mean(all_metrics, axis=0)
    print(f"Mean: {means}")
    return all_metrics



def plot_metrics_distribution_box(all_metrics, output_folder, title):
    all_metrics = np.where(all_metrics == None, np.nan, all_metrics)  # Replace None with NaN
    df = pd.DataFrame(all_metrics, columns=['Dice Coefficient', '95% Hausdorff Distance', 'Precision', 'Recall', 'Specificity'])
    df = df.apply(pd.to_numeric, errors='coerce')

    # Scale Hausdorff Distance by 128 to normalize it
    df['95% Hausdorff Distance'] = df['95% Hausdorff Distance'] / 128

    # Prepare the data for box plot
    data = {
        "Dice Coefficient": df['Dice Coefficient'].dropna(),
        "Normalised 95% Hausdorff Distance": df['95% Hausdorff Distance'].dropna(),
        "Precision": df['Precision'].dropna(),
        "Recall": df['Recall'].dropna(),
        "Specificity": df['Specificity'].dropna(),
    }

    # Convert data dictionary to list for boxplot
    box_data = [data[key] for key in data]
    
    # Boxplot settings
    plt.figure(figsize=(10, 5))
    box = plt.boxplot(box_data, patch_artist=True, showfliers=False, tick_labels=data.keys())  

    # Set custom colors with transparency
    colors = ['#90EE90', '#800080', '#1E90FF', '#4682B4', '#5F9EA0']  # Updated colors
    for patch, color in zip(box['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.5)  # Make boxplot bars translucent

    # Scatter plot overlay
    for i, (key, color) in enumerate(zip(data.keys(), colors)):  
        y_values = data[key]
        x_values = np.random.normal(i + 1, 0.05, size=len(y_values))  # Jitter for better visualization
        plt.scatter(x_values, y_values, alpha=0.8, color=color, s=10)  # Use matching color

    # Labels and aesthetics
    plt.xlabel("Evaluation Metric Categories")
    plt.ylabel("Evaluation Metric Values")
    plt.title(title)
    plt.ylim(-0.05, 1.05)
    plt.axhline(y=0, color='black', linestyle='--', linewidth=0.8)
    plt.axhline(y=1, color='black', linestyle='--', linewidth=0.8)

    plt.grid(True)
    plt.tight_layout()  # Adjust layout to prevent overlaps
    save_path = f"{output_folder}/{title.replace(' ', '_')}.png"  
    plt.savefig(save_path, dpi=300, bbox_inches='tight')  

    # plt.show()
    plt.close()



In [130]:
folder_path = '/Users/felicialiu/Desktop/ESC499/Code2/segbbox/seg_general_results'
output_folder = '/Users/felicialiu/Desktop/ESC499/Code2/segbbox/images'
all_metrics = process_folder(folder_path)
plot_metrics_distribution_box(all_metrics, output_folder, "Processed General LLM Segmentation Results on Axial Slices")


folder_path = "/Users/felicialiu/Desktop/ESC499/Code2/segbbox/seg_200steps_results/"
output_folder = '/Users/felicialiu/Desktop/ESC499/Code2/segbbox/images'
all_metrics = process_folder(folder_path)
plot_metrics_distribution_box(all_metrics, output_folder, "Processed Fine-Tuned LLM Segmentation Results (200 Steps) on Axial Slices")

Mean: [ 0.12194283 65.91030124  0.09887831  0.29412731  0.76225415]
Mean: [ 0.10852946 67.38121118  0.08821867  0.24133112  0.8136739 ]


In [145]:
import numpy as np
import os
import matplotlib.pyplot as plt
import glob
import pandas as pd

# Updated plot settings for consistency
plt.rcParams.update({'font.size': 8})
plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = ['Times New Roman']

# Collect metrics in the process function and later use them for plotting
def process_bounding_box(box):
    """Process the bounding box to convert into a usable format"""
    if box is None or np.array_equal(box, [(-1, -1)] * 4):
        return None
    if isinstance(box, (list, np.ndarray)) and len(box) == 4 and all(isinstance(x, int) for x in box):
        row_min, col_min, row_max, col_max = box
        return np.array([(row_min, col_min), (row_min, col_max), (row_max, col_max), (row_max, col_min)])
    try:
        box = np.array(box, dtype=int)
        return box if box.shape == (4, 2) else None
    except:
        return None


def process_file(filepath, last_gt_box=None):
    """Process the bounding box file and calculate metrics, return coordinates for 3D plot"""
    if not os.path.exists(filepath) or os.stat(filepath).st_size == 0:
        return last_gt_box, None, None

    with open(filepath, 'r') as file:
        lines = file.readlines()

    gt_coords, mo_coords = [], []
    for line in lines:
        if "Ground truth" in line and "Model output" in line:
            gt_box, mo_box = map(str.strip, line.split("||"))
            gt = process_bounding_box(parse_bounding_box(gt_box.split(":")[1]))
            mo = process_bounding_box(parse_bounding_box(mo_box.split(":")[1]))

            if gt is None:
                gt = last_gt_box  # Use the last valid GT if the current one is None

            if gt is None:
                gt = np.array([(-1, -1)] * 4)
                
            if mo is None:
                mo = np.array([(-1, -1)] * 4)
                
            last_gt_box = gt  # Update last valid GT box for the next iteration
            # print(gt.shape, mo.shape)
            gt_coords.append(gt)
            mo_coords.append(mo)

    return last_gt_box, np.array(gt_coords), np.array(mo_coords)


import numpy as np
import os

def pad_slices(slices, target_size=95):
    """
    Pads a list of (W, 4, 2) slices to shape (95, 4, 2), centering the data.
    """
    W = len(slices)
    padded = np.full((target_size, 4, 2), -1)  # Default padding with (-1, -1)

    if W > 0:
        start_idx = (target_size - W) // 2  # Compute centering offset
        padded[start_idx:start_idx + W] = slices  # Center the original slices
    
    return padded

def process_folder(folder_path):
    """Process all files in the folder, ensuring shape (N, 95, 4, 2)"""
    files = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')],
                   key=lambda x: int(x.split('_')[-1].split('.')[0]))

    all_patients_coords = []
    last_gt_box = None

    for file in files:
        last_gt_box, gt_coords, mo_coords = process_file(os.path.join(folder_path, file), last_gt_box)

        # Ensure gt_coords and mo_coords are at least (W, 4, 2)
        if gt_coords is None or len(gt_coords) == 0:
            gt_coords = np.full((1, 4, 2), -1)  # Single (-1, -1) placeholder if no data
        if mo_coords is None or len(mo_coords) == 0:
            mo_coords = np.full((1, 4, 2), -1)

        # print(gt_coords.shape, mo_coords.shape)  # Debugging size before padding

        # Pad to (95, 4, 2)
        gt_padded = pad_slices(gt_coords, 95)
        mo_padded = pad_slices(mo_coords, 95)

        all_patients_coords.append((gt_padded, mo_padded))

    # Convert to numpy array of shape (N, 95, 4, 2)
    all_gt_coords = np.array([x[0] for x in all_patients_coords])
    all_mo_coords = np.array([x[1] for x in all_patients_coords])

    # print(all_gt_coords.shape, all_mo_coords.shape)  # Should be (N, 95, 4, 2)
    return all_gt_coords, all_mo_coords


import os
import random
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def make_comparison_images(gt_boxes, mo_boxes, patient_num, folder_path, view=(30, 30)):
    patient_num -=1

    fig = plt.figure(figsize=(10, 5.4))

    # Ground Truth Bounding Box Plot
    ax1 = fig.add_subplot(1, 2, 1, projection='3d')
    for z, box in enumerate(gt_boxes):
        if np.all(box == -1):  # Ignore empty slices
            continue
        xs, ys = zip(*box)
        zs = [z] * 4
        # ax1.plot(xs + (xs[0],), ys + (ys[0],), zs + (zs[0],), color='blue', linewidth=1.5, alpha=0.8)
        ax1.plot(list(xs) + [xs[0]], list(ys) + [ys[0]], list(zs) + [zs[0]], color=(30/255, 0/255, 230/255), linewidth=1, alpha=0.8)


    # ax1.set_ylabel('Saggital Slices')
    ax1.set_zlabel('Axial Slices')
    # ax1.set_zlabel('Coronal Slices')
    ax1.set_title(f'Test Patient ID {patient_num} - Ground Truth')
    ax1.set_xlim([0, 128])
    ax1.set_ylim([0, 128])
    ax1.set_zlim([0, 95])
    ax1.view_init(elev=view[0], azim=view[1])

    # Model Prediction Bounding Box Plot
    ax2 = fig.add_subplot(1, 2, 2, projection='3d')
    for z, box in enumerate(mo_boxes):
        if np.all(box == -1):  # Ignore empty slices
            continue
        xs, ys = zip(*box)
        zs = [z] * 4
        # ax2.plot(xs + (xs[0],), ys + (ys[0],), zs + (zs[0],), color='red', linewidth=1.5, alpha=0.8)
        ax2.plot(list(xs) + [xs[0]], list(ys) + [ys[0]], list(zs) + [zs[0]], color=(128/255, 0/255, 0/255), linewidth=1, alpha=0.8)


    # ax2.set_ylabel('Saggital Slices')
    ax2.set_zlabel('Axial Slices')
    # ax2.set_zlabel('Axial Slices')
    ax2.set_title(f'Test Patient ID {patient_num} - LLM Model Prediction')
    ax2.set_xlim([0, 128])
    ax2.set_ylim([0, 128])
    ax2.set_zlim([0, 95])
    ax2.view_init(elev=view[0], azim=view[1])

    plt.tight_layout()
    save_path = os.path.join(folder_path, f"patient_{patient_num}_comparison.png")
    plt.savefig(save_path)
    plt.close()
    # plt.show()



def plot_all_patients_3d(all_gt_coords, all_mo_coords, folder_path):
    """Plot 3D bounding boxes for all patients"""
    for idx, (gt_coords, mo_coords) in enumerate(zip(all_gt_coords, all_mo_coords)):
        print(f"Processing Patient {idx+1}...")
        make_comparison_images(gt_coords, mo_coords, patient_num=idx+1, folder_path=folder_path)




In [146]:
# Example usage
folder_path = '/Users/felicialiu/Desktop/ESC499/Code2/segbbox/seg_general_results'
output_folder = '/Users/felicialiu/Desktop/ESC499/Code2/segbbox/gen_images'
all_gt_coords, all_mo_coords = process_folder(folder_path)
plot_all_patients_3d(all_gt_coords, all_mo_coords, output_folder)

Processing Patient 1...
Processing Patient 2...
Processing Patient 3...
Processing Patient 4...
Processing Patient 5...
Processing Patient 6...
Processing Patient 7...
Processing Patient 8...
Processing Patient 9...
Processing Patient 10...
Processing Patient 11...
Processing Patient 12...
Processing Patient 13...
Processing Patient 14...
Processing Patient 15...
Processing Patient 16...
Processing Patient 17...
Processing Patient 18...
Processing Patient 19...
Processing Patient 20...
Processing Patient 21...
Processing Patient 22...
Processing Patient 23...
Processing Patient 24...
Processing Patient 25...
Processing Patient 26...
Processing Patient 27...
Processing Patient 28...
Processing Patient 29...
Processing Patient 30...
Processing Patient 31...
Processing Patient 32...
Processing Patient 33...
Processing Patient 34...
Processing Patient 35...
Processing Patient 36...
Processing Patient 37...
Processing Patient 38...
Processing Patient 39...
Processing Patient 40...
Processin

In [147]:
# Example usage
folder_path = "/Users/felicialiu/Desktop/ESC499/Code2/segbbox/seg_200steps_results/"
output_folder = '/Users/felicialiu/Desktop/ESC499/Code2/segbbox/finetune_images'
all_gt_coords, all_mo_coords = process_folder(folder_path)
plot_all_patients_3d(all_gt_coords, all_mo_coords, output_folder)

Processing Patient 1...
Processing Patient 2...
Processing Patient 3...
Processing Patient 4...
Processing Patient 5...
Processing Patient 6...
Processing Patient 7...
Processing Patient 8...
Processing Patient 9...
Processing Patient 10...
Processing Patient 11...
Processing Patient 12...
Processing Patient 13...
Processing Patient 14...
Processing Patient 15...
Processing Patient 16...
Processing Patient 17...
Processing Patient 18...
Processing Patient 19...
Processing Patient 20...
Processing Patient 21...
Processing Patient 22...
Processing Patient 23...
Processing Patient 24...
Processing Patient 25...
Processing Patient 26...
Processing Patient 27...
Processing Patient 28...
Processing Patient 29...
Processing Patient 30...
Processing Patient 31...
Processing Patient 32...
Processing Patient 33...
Processing Patient 34...
Processing Patient 35...
Processing Patient 36...
Processing Patient 37...
Processing Patient 38...
Processing Patient 39...
Processing Patient 40...
Processin