In [None]:
from pathlib import Path

data_dir = 'data/test'

In [None]:
import numpy as np
import cv2
from sklearn.metrics import jaccard_score, precision_score, recall_score, f1_score
from tqdm import tqdm

In [None]:
def load_and_convert_image(image_path, reverse = False):
    
    image = cv2.imread(image_path, 0)
    if reverse:
        image = cv2.subtract(255, image) 
    return image

def prepare_binary_mask(mask, threshold=127):
    
    _, binary_mask = cv2.threshold(mask, threshold, 255, cv2.THRESH_BINARY)
    binary_mask = binary_mask / 255  # Convert to 0 and 1
    return binary_mask


def calculate_segmentation_metrics(true_mask, predicted_mask):
    true_mask_flat = true_mask.flatten()
    predicted_mask_flat = predicted_mask.flatten()

    # metrics
    precision = precision_score(true_mask_flat, predicted_mask_flat)
    recall = recall_score(true_mask_flat, predicted_mask_flat)
    f1 = f1_score(true_mask_flat, predicted_mask_flat)
    iou = jaccard_score(true_mask_flat, predicted_mask_flat)
    return precision, recall, f1, iou



In [None]:
# image path

original_image_path = list(Path(data_dir).glob("*_orig.png"))

handwritten_mask_path = [Path(data_dir) / (i.stem.rstrip('_orig') + "_gt.png") for i in original_image_path]

model_output_path = [Path('inference_test/') / (i.stem.rstrip('_orig') + "_tr_CLEANED_TO.png") for i in original_image_path]

original_image = load_and_convert_image(original_image_path[0].as_posix())
handwritten_mask = load_and_convert_image(handwritten_mask_path[0].as_posix(), reverse = True)
model_output = load_and_convert_image(model_output_path[0].as_posix())

binary_mask = prepare_binary_mask(handwritten_mask)
binary_model_output = prepare_binary_mask(model_output)
binary_original = prepare_binary_mask(original_image)

In [None]:
# basic metrics
precision, recall, f1, iou = calculate_segmentation_metrics(binary_mask, binary_model_output)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'IoU (Intersection over Union): {iou:.4f}')

In [None]:
def prepare_images(original_image, handwritten_mask, model_output):
    original_image = load_and_convert_image(original_image)
    handwritten_mask = load_and_convert_image(handwritten_mask, reverse = True)
    model_output = load_and_convert_image(model_output)

    binary_mask = prepare_binary_mask(handwritten_mask)
    binary_model_output = prepare_binary_mask(model_output)
    binary_original = prepare_binary_mask(original_image)
    
    return binary_mask, binary_model_output, binary_original

In [None]:
accuracy = []
for i in tqdm(range(0, len(original_image_path))):
    binary_mask, binary_model_output, binary_original = prepare_images(
        original_image_path[i].as_posix(),
        handwritten_mask_path[i].as_posix(),
        model_output_path[i].as_posix()
    )
    
    precision, recall, f1, iou = calculate_segmentation_metrics(binary_mask, binary_model_output)
    
    accuracy.append((precision, recall, f1, iou))

In [None]:
import pandas as pd

In [None]:
pd.DataFrame(accuracy, columns = ["precision", "recall", "f1", "iou"]).describe()

In [None]:
def calculate_advanvced_metrics(original, handwritten_mask, predicted_mask):
    # Assuming [0 1] images

    # Text that should be removed (intersection between original and handwritten mask)
    should_remove = original * handwritten_mask

    # Successfully removed text (intersection between what should have been removed and what was predicted)
    successfully_removed = should_remove * predicted_mask

    # Complete removal rate
    complete_removal_rate = np.sum(successfully_removed) / np.sum(should_remove) if np.sum(should_remove) > 0 else 1.0  # Avoid division by zero

    # Integrity of printed text (text that should NOT have been removed and remained)
    should_keep = original * (1 - handwritten_mask)
    kept_text = should_keep * (1 - predicted_mask)  # Inverse of the prediction since we are considering what remains
    printed_text_integrity = np.sum(kept_text) / np.sum(should_keep) if np.sum(should_keep) > 0 else 1.0  # Adding condition here

    # Obfuscation Error Rate (part of the handwritten that was not removed)
    erroneous_preservation = should_remove * (1 - predicted_mask)
    obfuscation_error_rate = np.sum(erroneous_preservation) / np.sum(should_remove) if np.sum(should_remove) > 0 else 0.0

    return complete_removal_rate, printed_text_integrity, obfuscation_error_rate

In [None]:
# calculate advanved metrics
# Not sure we need it, as it will need to be explained in text. With this sample, it makes no difference, but with the original test set, where we have less accuracy, the results could be interesting
complete_removal_rate, printed_text_integrity, obfuscation_error_rate = calculate_advanvced_metrics(binary_original, binary_mask, binary_model_output)

print(f'Complete Removal Rate: {complete_removal_rate:.4f}')
print(f'Printed Text Integrity: {printed_text_integrity:.4f}')
print(f'Obfuscation Error Rate: {obfuscation_error_rate:.4f}')