In [1]:
import numpy as np
import pandas as pd
from ultralytics import YOLO
import matplotlib.pyplot as plt
import csv
import os
import cv2
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm
import multiprocessing
from functools import partial
from sklearn.metrics import mean_squared_error, cohen_kappa_score, accuracy_score


# Inter Observer Variablility

In [7]:
file_path = "CSV_Preds/InterVar.csv"  
InterVar = pd.read_csv(file_path)

InterVar.columns = ["name", "Observer_1", "Observer_2", "Common"]

InterVar['Total'] = InterVar['Observer_1'] + InterVar['Observer_2'] + InterVar['Common']

InterVar['Observer_1'] = (InterVar['Observer_1'] / InterVar['Total']) * 100
InterVar['Observer_2'] = (InterVar['Observer_2'] / InterVar['Total']) * 100
InterVar['Common'] = (InterVar['Common'] / InterVar['Total']) * 100

InterVar = InterVar.sort_values(by="Observer_1", ascending=False)
InterVar['index'] = range(len(InterVar))

fig = go.Figure()

fig.add_trace(go.Bar(
    x=InterVar['index'],
    y=InterVar["Observer_1"],
    name="<b>Observer-1</b>",
    marker_color='#2D6A4F'
))

fig.add_trace(go.Bar(
    x=InterVar['index'],
    y=InterVar["Common"],
    name="<b>Common</b>",
    marker_color='#F4D35E'
))

fig.add_trace(go.Bar(
    x=InterVar['index'],
    y=InterVar["Observer_2"],
    name="<b>Observer-2</b>",
    marker_color='#84C5A1'
))

fig.update_layout(
    barmode='stack',
    title="<b>Inter-Observer Variability of Neutrophil Counts in Tiles</b>",
    xaxis_title="<b>Tile Index</b>",
    yaxis_title="<b>Percentage</b>",
    xaxis=dict(
        tickmode='linear',
        tick0=0,
        dtick=1
    ),
    legend_title="<b>Categories</b>",
    yaxis=dict(
        range=[0,100] 
    )
)

fig.show()

In [212]:
InterVar = pd.read_csv(file_path)
InterVar.columns = ["name", "Observer_1", "Observer_2", "Common"]

InterVar = InterVar.sort_values(by="Observer_1", ascending=False)


InterVar['index'] = range(len(InterVar))

fig = go.Figure()

fig.add_trace(go.Bar(
    x=InterVar['index'],
    y=InterVar["Observer_1"],
    name="<b>Observer-1</b>",
    marker_color='#2D6A4F'
))

fig.add_trace(go.Bar(
    x=InterVar['index'],
    y=InterVar["Common"],
    name="<b>Common</b>",
    marker_color='#F4D35E'
))

fig.add_trace(go.Bar(
    x=InterVar['index'],
    y=InterVar["Observer_2"],
    name="<b>Observer-2<b>",
    marker_color='#84C5A1'
))

fig.update_layout(
    barmode='stack',
    title="<b>Inter-Observer Variability of Neutrophil Counts in Tiles</b>",
    xaxis_title="<b>Tile Index<b>",
    yaxis_title="<b>Neutrophil Count (%)</b>",
    xaxis=dict(
        tickmode='linear',
        tick0=0,
        dtick=1
    ),
    legend_title="<b>Categories<b>"
)

fig.show()

In [213]:
InterVar = pd.read_csv(file_path)

InterVar = InterVar.reset_index()
total = InterVar['Observer – 1'] + InterVar['Common'] + InterVar['Observer – 2']

fig = go.Figure(data=[
    go.Bar(
        name='Observer – 1',
        x=InterVar['index'],
        y=InterVar['Observer – 1'] / total * 100,
        marker_color='#2D6A4F'
    ),
    go.Bar(
        name='Common',
        x=InterVar['index'],
        y=InterVar['Common'] / total * 100,
        marker_color='#F4D35E'
    ),
    go.Bar(
        name='Observer – 2',
        x=InterVar['index'],
        y=InterVar['Observer – 2'] / total * 100,
        marker_color='#84C5A1'
    )
])

fig.update_layout(
    barmode='stack',
    title=dict(text='<b>Inter-Observer Variability of Neutrophil Counts in Tiles</b>', font=dict(size=24, family='Arial, bold')),
    xaxis_title=dict(text='<b>Tile Index</b>', font=dict(size=18, family='Arial, bold')),
    yaxis_title=dict(text='<b>Neutrophil Counts (%)</b>', font=dict(size=18, family='Arial, bold')),
    yaxis=dict(range=[0, 100], ticksuffix='%', title_font=dict(size=18), tickfont=dict(size=14)),
    xaxis=dict(tickfont=dict(size=14)),
    legend_title='Categories',
    template='plotly_white'
)

fig.show()

Sankey plot

In [2]:
res = pd.read_csv("CSV_Preds/Res.csv")
res.head()

Unnamed: 0,name,ground truth neutrophils,predicted neutrophils,missed,false_positives,true_positives,label_groundtruth,label_prediction
0,tile_24555_140484.png,37,59,2,24,35,2,2
1,tile_24555_142520.png,17,30,4,17,13,2,2
2,tile_24555_144556.png,17,24,5,12,12,2,2
3,tile_26192_80422.png,40,89,2,51,38,2,2
4,tile_26192_81440.png,33,77,4,48,29,2,2


In [3]:

y_true = res['label_groundtruth'].to_numpy()
y_pred = res['label_prediction'].to_numpy()

mse = mean_squared_error(y_true, y_pred)
print(f"Mean Squared Error (MSE): {mse:.4f}")

weighted_kappa = cohen_kappa_score(y_true, y_pred, weights="quadratic")
print(f"Weighted Cohen's Kappa: {weighted_kappa:.4f}")

accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4%}")  


Mean Squared Error (MSE): 0.0656
Weighted Cohen's Kappa: 0.9133
Accuracy: 93.4426%


In [22]:



safe_colors = ["#00A087", "#4DBBD5", "#E64B35", "#00A087", "#4DBBD5", "#E64B35"]

default_nejm = ["#029E73", "#0072B5", "#BC3C29", "#029E73", "#0072B5", "#BC3C29"]
default_jco = ["#74C476", "#4575B4", "#D73027", "#74C476", "#4575B4", "#D73027"]
lanonc_lancet = ["#4DAF4A", "#377EB8", "#E41A1C", "#4DAF4A", "#377EB8", "#E41A1C"]

sankey_data = res.groupby(['label_groundtruth', 'label_prediction']).size().reset_index(name='count')

link_colors = [safe_colors[src] for src in sankey_data['label_groundtruth']]

fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=5,  
        thickness=85,  
        line=dict(color="black", width=1),  
        color=safe_colors  
    ),
    link=dict(
        source=sankey_data['label_groundtruth'],  
        target=sankey_data['label_prediction'] + 3,  
        value=sankey_data['count'],  
        # color=link_colors,  
    )
)])

fig.update_layout(
    title_x=0.5,
    title_y=0.05,
    font_size=40,
    title_text=f"MSE = {mse:.2f}, Weighted Kappa = {weighted_kappa:.2f}, Accuracy = {accuracy:.0%}", 
    title_font=dict(family="Arial Black", size=30, color="black"),
    plot_bgcolor=safe_colors[2], 
    height=800,
    width=800,
    annotations=[
        dict(
            x=-0.05, y=1.08, 
            text="<b>Observed Score</b>", 
            showarrow=False, 
            font=dict(size=30)
        ),
        dict(
            x=1.05, y=1.08, 
            text="<b>Predicted Score</b>", 
            showarrow=False, 
            font=dict(size=30)
        )
    ]
)

fig.show()

In [9]:
def calculate_iou(box1, box2):
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2

    x1 = max(x1_1, x1_2)
    y1 = max(y1_1, y1_2)
    x2 = min(x2_1, x2_2)
    y2 = min(y2_1, y2_2)
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)

    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = box1_area + box2_area - intersection_area

    iou = intersection_area / union_area if union_area > 0 else 0
    return iou

In [10]:
def get_label(count):
    if count == 0:
        return 0
    elif count < 5:
        return 1
    else:
        return 2

In [None]:
def process_image(image_path, label_path, result, model_output_dir):
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    ground_truth_boxes = []
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f.readlines():
                class_id, x_center, y_center, width, height = map(float, line.strip().split())
                x1 = (x_center - width / 2) * image_width
                y1 = (y_center - height / 2) * image_height
                x2 = (x_center + width / 2) * image_width
                y2 = (y_center + height / 2) * image_height
                ground_truth_boxes.append([x1, y1, x2, y2])

    predicted_boxes = []
    for box in result.boxes:
        x1, y1, x2, y2 = box.xyxy[0]
        predicted_boxes.append([x1, y1, x2, y2])

    missed = 0
    false_positives = 0
    true_positives = 0
    matched_gt_boxes = [False] * len(ground_truth_boxes)

    for pred_box in predicted_boxes:
        found = False
        best_iou = 0.1
        best_match_idx = -1

        for i, gt_box in enumerate(ground_truth_boxes):
            if not matched_gt_boxes[i]:
                iou = calculate_iou(pred_box, gt_box)
                if iou > best_iou:
                    best_iou = iou
                    best_match_idx = i

        if best_match_idx != -1:
            true_positives += 1
            matched_gt_boxes[best_match_idx] = True
        else:
            false_positives += 1

    for i, matched in enumerate(matched_gt_boxes):
        if not matched:
            missed += 1

    for gt_box in ground_truth_boxes:
        cv2.rectangle(image, (int(gt_box[0]), int(gt_box[1])), (int(gt_box[2]), int(gt_box[3])), (0, 255, 0), 4)

    for pred_box in predicted_boxes:
        center_x = int((pred_box[0] + pred_box[2]) / 2)
        center_y = int((pred_box[1] + pred_box[3]) / 2)
        radius = int(min(pred_box[2] - pred_box[0], pred_box[3] - pred_box[1]) / 2)
        cv2.circle(image, (center_x, center_y), radius, (0, 0, 255), 4)

    text = f"Ground Truth: {len(ground_truth_boxes)}, Predicted: {len(predicted_boxes)}"
    cv2.putText(image, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

    output_image_path = os.path.join(model_output_dir, f"output_{os.path.basename(image_path)}")
    plt.imsave(output_image_path, image)

    return missed, false_positives, true_positives, len(ground_truth_boxes), len(predicted_boxes)

def evaluate_models_for_label_dir(model_names, test_images_dir, test_labels_dir):
    model_performance = {}

    for model_name in model_names:
        model_output_dir = f"out/{model_name.replace('.pt', '')}_{os.path.basename(test_labels_dir.rstrip('/'))}"
        model_csv_path = f"output_metrics_{os.path.basename(test_labels_dir.rstrip('/'))}_{model_name.replace('.pt', '')}.csv"

        if os.path.exists(model_output_dir) and os.path.exists(model_csv_path):
            print(f"Loading results for model: {model_name} and label dir: {test_labels_dir} from existing CSV file")
            total_missed = 0
            total_false_positives = 0
            total_true_positives = 0

            with open(model_csv_path, mode="r") as csv_file:
                reader = csv.DictReader(csv_file)
                for row in reader:
                    total_missed += int(row["missed"])
                    total_false_positives += int(row["false_positives"])
                    total_true_positives += int(row["true_positives"])

            model_performance[model_name] = {
                "total_missed": total_missed,
                "total_false_positives": total_false_positives,
                "total_true_positives": total_true_positives
            }
            continue

        os.makedirs(model_output_dir, exist_ok=True)
        print(f"Evaluating model: {model_name} for label dir: {test_labels_dir}")
        model = YOLO(f"models/{model_name}")
        results = model.predict(source=test_images_dir, show_labels=False, show_conf=False, conf=0.4, verbose=False)

        total_missed = 0
        total_false_positives = 0
        total_true_positives = 0

        with open(model_csv_path, mode="w", newline="") as csv_file:
            fieldnames = [
                "name", "ground truth neutrophils", "predicted neutrophils", "missed",
                "false_positives", "true_positives", "label_groundtruth", "label_prediction"
            ]
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
            writer.writeheader()

            for result in results:
                image_name = os.path.basename(result.path)
                image_path = os.path.join(test_images_dir, image_name)
                label_path = os.path.join(test_labels_dir, image_name.replace(".png", ".txt"))

                if not os.path.exists(label_path):
                    continue

                missed, false_positives, true_positives, gt_count, pred_count = process_image(
                    image_path, label_path, result, model_output_dir
                )

                total_missed += missed
                total_false_positives += false_positives
                total_true_positives += true_positives

                label_groundtruth = get_label(gt_count)
                label_prediction = get_label(pred_count)

                writer.writerow({
                    "name": image_name,
                    "ground truth neutrophils": gt_count,
                    "predicted neutrophils": pred_count,
                    "missed": missed,
                    "false_positives": false_positives,
                    "true_positives": true_positives,
                    "label_groundtruth": label_groundtruth,
                    "label_prediction": label_prediction
                })

        model_performance[model_name] = {
            "total_missed": total_missed,
            "total_false_positives": total_false_positives,
            "total_true_positives": total_true_positives
        }

    return model_performance

In [65]:
test_images_dir = "images/"
test_labels_dirs = ["Union_Labels/", "Intersection_Labels/", "labels_O1/", "labels_O2/"]
model_names = os.listdir("models")

for test_labels_dir in test_labels_dirs:
    print(f"Evaluating models for label directory: {test_labels_dir}")
    model_performance = evaluate_models_for_label_dir(model_names, test_images_dir, test_labels_dir)


    print(f"Performance for label directory {test_labels_dir}:")
    for model_name, metrics in model_performance.items():
        print(f"Model: {model_name}, Missed: {metrics['total_missed']}, False Positives: {metrics['total_false_positives']}, True Positives: {metrics['total_true_positives']}")

Evaluating models for label directory: Union_Labels/
Evaluating model: best_11x_datanew_NoAug_b16.pt for label dir: Union_Labels/
Evaluating model: best_11s_dataold_Noaug_b16.pt for label dir: Union_Labels/
Evaluating model: best_8s_dataold_Noaug_b16.pt for label dir: Union_Labels/
Evaluating model: best_11s_datanew_NoAug_b16.pt for label dir: Union_Labels/
Evaluating model: best_11s_1.pt for label dir: Union_Labels/
Evaluating model: best_11m_dataold_NoAug_b16.pt for label dir: Union_Labels/
Performance for label directory Union_Labels/:
Model: best_11x_datanew_NoAug_b16.pt, Missed: 276, False Positives: 295, True Positives: 605
Model: best_11s_dataold_Noaug_b16.pt, Missed: 131, False Positives: 929, True Positives: 750
Model: best_8s_dataold_Noaug_b16.pt, Missed: 221, False Positives: 538, True Positives: 660
Model: best_11s_datanew_NoAug_b16.pt, Missed: 249, False Positives: 406, True Positives: 632
Model: best_11s_1.pt, Missed: 174, False Positives: 657, True Positives: 707
Model: 

In [None]:
def process_image(image_path, label_path, result, conf_threshold):
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    ground_truth_boxes = []
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f.readlines():
                class_id, x_center, y_center, width, height = map(float, line.strip().split())
                x1 = (x_center - width / 2) * image_width
                y1 = (y_center - height / 2) * image_height
                x2 = (x_center + width / 2) * image_width
                y2 = (y_center + height / 2) * image_height
                ground_truth_boxes.append([x1, y1, x2, y2])

    predicted_boxes = []
    for box in result.boxes:
        if box.conf[0] >= conf_threshold:
            x1, y1, x2, y2 = box.xyxy[0]
            predicted_boxes.append([x1, y1, x2, y2])

    matched_gt_boxes = [False] * len(ground_truth_boxes)
    true_positives = 0
    false_positives = 0

    for pred_box in predicted_boxes:
        found = False
        for i, gt_box in enumerate(ground_truth_boxes):
            if calculate_iou(pred_box, gt_box) > 0.1: 
                found = True
                if not matched_gt_boxes[i]:
                    true_positives += 1
                    matched_gt_boxes[i] = True
                break
        if not found:
            false_positives += 1

    return true_positives, false_positives, len(ground_truth_boxes)

def evaluate_threshold(conf_threshold, model, test_images_dir, test_labels_dir):
    results = model.predict(source=test_images_dir, show_labels=False, show_conf=False, conf=conf_threshold, verbose=False)

    total_true_positives = 0
    total_false_positives = 0
    total_ground_truth = 0

    for result in results:
        image_name = os.path.basename(result.path)
        image_path = os.path.join(test_images_dir, image_name)
        label_path = os.path.join(test_labels_dir, image_name.replace(".png", ".txt"))

        if not os.path.exists(label_path):
            continue

        true_positives, false_positives, ground_truth = process_image(image_path, label_path, result, conf_threshold)
        total_true_positives += true_positives
        total_false_positives += false_positives
        total_ground_truth += ground_truth

    precision = total_true_positives / (total_true_positives + total_false_positives) if (total_true_positives + total_false_positives) > 0 else 0
    recall = total_true_positives / total_ground_truth if total_ground_truth > 0 else 0
    accuracy = total_true_positives / total_ground_truth if total_ground_truth > 0 else 0

    return conf_threshold, precision, recall, accuracy

def evaluate_all_models(model_names, test_images_dir, test_labels_dirs):
    confidence_thresholds = np.linspace(0.0, 1.0, 100)

    for model_name in model_names:
        print(f"Evaluating model: {model_name}")
        model = YOLO(f"models/{model_name}")

        for test_labels_dir in test_labels_dirs:
            print(f"Processing label directory: {test_labels_dir}")

            # Replace tqdm and list comprehension with a normal for loop
            results = []
            
            for conf_threshold in tqdm(confidence_thresholds, desc="Processing thresholds"):
                
                result = evaluate_threshold(conf_threshold, model, test_images_dir, test_labels_dir)
                results.append(result)

            conf_thresholds_tracked, precisions, recalls, accuracies = zip(*results)

            f1_scores = [2 * (p * r) / (p + r) if (p + r) > 0 else 0 for p, r in zip(precisions, recalls)]

            fig = go.Figure()
            fig.add_trace(go.Scatter(
                x=recalls,
                y=precisions,
                mode='lines+markers',
                name='PR Curve',
                line=dict(color='rgba(0, 120, 220, 0.8)', width=3, dash='solid'),
                marker=dict(size=8, color='rgba(255, 80, 80, 0.9)', symbol='circle', line=dict(width=1, color='darkred')),
                hovertemplate=(
                    'Recall: %{x:.2f}<br>'
                    'Precision: %{y:.2f}<br>'
                    'Confidence: %{text:.2f}<br>'
                    'F1 Score: %{customdata[0]:.2f}<br>'
                    'Accuracy: %{customdata[1]:.2f}<extra></extra>'
                ),
                text=conf_thresholds_tracked,
                customdata=list(zip(f1_scores, accuracies))
            ))

            fig.add_trace(go.Scatter(
                x=recalls,
                y=precisions,
                fill='tozeroy',
                mode='none',
                name='Area Under Curve',
                fillcolor='rgba(0, 120, 220, 0.2)'
            ))

            specified_points = [
                {'precision': 0.84, 'recall': 0.58},
                {'precision': 0.58, 'recall': 0.84}
            ]

            for point in specified_points:
                fig.add_trace(go.Scatter(
                    x=[point['recall']],
                    y=[point['precision']],
                    mode='markers+text',
                    name=f"Precision={point['precision']}, Recall={point['recall']}",
                    marker=dict(size=10, color='green', symbol='star'),
                    text=[f"P={point['precision']}, R={point['recall']}"],
                    textposition='top center'
                ))

            fig.update_layout(
                title=f"Precision-Recall Curve for {model_name})",
                xaxis_title="Recall",
                yaxis_title="Precision",
                showlegend=True
            )
            fig.show()

In [None]:
test_images_dir = "images/"
test_labels_dirs = ["Union_Labels/",  "labels_O1/", "labels_O2/""Intersection_Labels/", "labels_O1/", "labels_O2/"]
model_names = ["best_11s_1024_dataO2_NoAug_b16.pt"]

evaluate_all_models(model_names, test_images_dir, test_labels_dirs)

Evaluating model: best_11s_1024_dataO2_NoAug_b16.pt
Processing label directory: Union_Labels/


Processing thresholds: 100%|██████████| 100/100 [26:07<00:00, 15.68s/it]


Processing label directory: Intersection_Labels/


Processing thresholds: 100%|██████████| 100/100 [18:49<00:00, 11.29s/it]


Processing label directory: labels_O1/


Processing thresholds: 100%|██████████| 100/100 [20:40<00:00, 12.41s/it]


Processing label directory: labels_O2/


Processing thresholds: 100%|██████████| 100/100 [18:51<00:00, 11.31s/it]
