In [7]:
import os
import json
from shapely.geometry import shape

class_mapping = {
    # 'macrophage': 'macrophage',
    # 'Macrophage': 'macrophage',
    # 'epithelium': 'epithelium',
    # 'Epithelium': 'epithelium',
    # 'neutrophil': 'neutrophil',
    # 'vascular endothelium': 'endothelium',
    # 'Vascular endothelium': 'endothelium',
    # 'melanophage': 'melanophage',
    # 'Melanophage': 'melanophage',
    # 'plasma cell': 'plasma cell',
    # 'Plasma cell': 'plasma cell',
    # 'lymphocytes': 'lymphocyte',
    'tumor': 'tumor',
    'Tumor': 'tumor',
    # 'stroma': 'stroma',
    # 'Stroma': 'stroma',
    # 'eosinophil': 'eosinofil',
    # 'Eosinophil': 'eosinofil',
    # 'Immune cells' :'lymphocyte',
}

def iou(poly1, poly2):
    """Calculate Intersection Over Union for two polygons."""
    intersection = poly1.intersection(poly2).area
    union = poly1.union(poly2).area
    return intersection / union

def update_and_save_geojsons(folder1, folder2, suffix1, suffix2):
    classes = set()
    target_folder = '/mnt/d/TIL_Melanoma_train_database/cell_segmentation/TUE/manual_segmented_tiles/both'
    
    # Make sure target folder exists
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    # Get all GeoJSON filenames from both folders
    files1 = set([f for f in os.listdir(folder1) if f.endswith('.geojson')])
    files2 = set([f for f in os.listdir(folder2) if f.endswith('.geojson')])

    # Only include GeoJSONs that are present in both the folders
    common_files = files1.intersection(files2)

    # Update and save GeoJSONs from folder1
    for filename in common_files:
        with open(os.path.join(folder1, filename), 'r') as file:
            geojson = json.load(file)

        # Update class names
        for annot in geojson['features']:
            if 'classification' in annot['properties']:
                old_class = annot['properties']['classification']['name']
                new_class = class_mapping.get(old_class, 'other')
                annot['properties']['classification']['name'] = new_class
                classes.add(new_class)

        # Save the updated GeoJSON to the target folder
        new_filename = filename.split('.')[0] + suffix1 + '.geojson'
        with open(os.path.join(target_folder, new_filename), 'w') as file:
            json.dump(geojson, file)

    # Update and save GeoJSONs from folder2
    for filename in common_files:
        with open(os.path.join(folder2, filename), 'r') as file:
            geojson = json.load(file)

        # Update class names
        for annot in geojson['features']:
            if 'classification' in annot['properties']:
                old_class = annot['properties']['classification']['name']
                new_class = class_mapping.get(old_class, 'other')
                annot['properties']['classification']['name'] = new_class
                classes.add(new_class)

        # Save the updated GeoJSON to the target folder
        new_filename = filename.split('.')[0] + suffix2 + '.geojson'
        with open(os.path.join(target_folder, new_filename), 'w') as file:
            json.dump(geojson, file)

    return classes

# Example usage:

nadiya = '/mnt/d/TIL_Melanoma_train_database/cell_segmentation/TUE/manual_segmented_tiles/nadiya'
rachelle = '/mnt/d/TIL_Melanoma_train_database/cell_segmentation/TUE/manual_segmented_tiles/rachelle'
update_and_save_geojsons(nadiya, rachelle, '_nadiya', '_rachelle')


{'other', 'tumor'}

In [8]:
from sklearn.metrics import cohen_kappa_score
from tqdm import tqdm

def compare_and_calculate_kappa(target_folder):
    # Get all GeoJSON filenames from the target folder
    all_files = [f for f in os.listdir(target_folder) if f.endswith('.geojson')]
    
    # Separate files based on their suffixes
    files_nadiya = [f for f in all_files if "_nadiya.geojson" in f]
    files_rachelle = [f for f in all_files if "_rachelle.geojson" in f]
    
    # Identify common base filenames
    base_names_nadiya = {f.replace('_nadiya.geojson', '') for f in files_nadiya}
    base_names_rachelle = {f.replace('_rachelle.geojson', '') for f in files_rachelle}
    common_base_names = base_names_nadiya.intersection(base_names_rachelle)
    
    # Lists to store class labels for Cohen's Kappa calculation
    labels_nadiya = []
    labels_rachelle = []

    for base_name in tqdm(common_base_names):
        file_nadiya = base_name + "_nadiya.geojson"
        file_rachelle = base_name + "_rachelle.geojson"
        
        # Load GeoJSON files
        with open(os.path.join(target_folder, file_nadiya), 'r') as file:
            geojson_nadiya = json.load(file)
        with open(os.path.join(target_folder, file_rachelle), 'r') as file:
            geojson_rachelle = json.load(file)
        
        annotations_nadiya = geojson_nadiya['features']
        annotations_rachelle = geojson_rachelle['features']
        
        for annot1 in annotations_nadiya:
            for annot2 in annotations_rachelle:
                poly1 = shape(annot1['geometry'])
                poly2 = shape(annot2['geometry'])
                
                current_iou = iou(poly1, poly2)
                
                if current_iou > 0.5:
                    class1 = annot1['properties']['classification']['name'] if 'classification' in annot1['properties'] else "NoClass"
                    class2 = annot2['properties']['classification']['name'] if 'classification' in annot2['properties'] else "NoClass"
                    
                    labels_nadiya.append(class1)
                    labels_rachelle.append(class2)
    
    # Calculate Cohen's Kappa
    kappa = cohen_kappa_score(labels_nadiya, labels_rachelle)
    
    return kappa

# Example Usage:
kappa_value = compare_and_calculate_kappa('/mnt/d/TIL_Melanoma_train_database/cell_segmentation/TUE/manual_segmented_tiles/both')
print(kappa_value)

100%|██████████| 10/10 [05:46<00:00, 34.62s/it]

0.48882671147132395



