In [1]:
import openslide
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
from openslide import ImageSlide, OpenSlide
import xml.etree.ElementTree as ET
import h5py
import os
import re

In [2]:
def parse_tree(filepath):
    tree = ET.parse(filepath)
    root = tree.getroot()
    all_polygons = {}
    for child in root[0]:
        coords = [[float(coord.attrib['X']), float(coord.attrib['Y'])] for coord in child[0]]
        #so the polygon is closed, append first corner to the end
        coords.append(coords[0])
        all_polygons['Group'+child.attrib['PartOfGroup']+'_'+'Name'+child.attrib['Name']] = coords
    return all_polygons

def annots_to_masks(annotations):
    result_dict = {}
    for key, values in annotations.items():
        rounded = [[round(coord) for coord in tup] for tup in values]
        minx, miny = min([i[0] for i in rounded]), min([i[1] for i in rounded])
        maxx, maxy = max([i[0] for i in rounded]), max([i[1] for i in rounded])
        moved_rounded = [tuple([coord-move for coord, move in zip(tup,[minx,miny])]) for tup in rounded]
        img = Image.new('L', (maxx-minx, maxy-miny), 0)
        ImageDraw.Draw(img).polygon(moved_rounded, outline=1, fill=1)
        mask = np.array(img)
        result_dict[key] = {'top_left': (minx, miny), 'shape': mask.shape,'mask':mask}
    return result_dict

def load_h5(filepath):
    with h5py.File(filepath, "r") as f:
        a_group_key = list(f.keys())[0]
        data = list(f[a_group_key])
    return data

def intersects(top_left1, sizes1, top_left2, sizes2):
    return not (top_left1[0] + sizes1[0] < top_left2[0] or
                top_left1[0] > top_left2[0] + sizes2[0] or
                top_left1[1] > top_left2[1] + sizes2[1] or
                top_left1[1] + sizes1[1] < top_left2[1])


In [None]:
tif_path = "./CLAM_files/CAMELYON16/training/images/"
annotations_dir = my_dir + "annotations/"
PATH=tif_path + "tumor_023.tif"
for patch_size in [224, 256]:
    h5_dir_path = f"./CLAM_files/CAMELYON16_patches_{patch_size}/training/patches/"
    patch_shape = (patch_size, patch_size)

    if not os.path.exists(f'tumor_stats/{patch_size}/'):
        os.mkdir(f'tumor_stats/{patch_size}/')

    for filename in os.listdir(annotations_dir):
        if filename[-4:] == '.xml':
            polygons = parse_tree(annotations_dir + filename)
            masks = annots_to_masks(polygons)
            h5_path = h5_dir_path + filename[:-3] + "h5"
            patches_corner = load_h5(h5_path)
            patches_data = []
            for i, patch_corner in enumerate(patches_corner):
                for mask_name in masks.keys():
                    mask = masks[mask_name]
                    mask_corner = mask['top_left']
                    intersection_pixels = 0
                    if intersects(patch_corner, patch_shape, mask_corner, mask['shape']):
                        annot_height, annot_width = mask['shape']
                        intersection = mask['mask'][
                        max(0, patch_corner[1]-mask_corner[1]):min(annot_height, patch_corner[1]-mask_corner[1]+patch_size),
                        max(0, patch_corner[0]-mask_corner[0]):min(annot_width, patch_corner[0]-mask_corner[0]+patch_size)
                        ]
                        intersection_pixels = np.sum(intersection)
                        patches_data.append(pd.DataFrame([[i, mask_name, patch_corner, patch_corner[0], patch_corner[1], intersection_pixels]], 
                                                         columns=['patch_id', 'annotation_name', 'patch_top_left', 'patch_top_left_x', 'patch_top_left_y', 'ill_cells_area']))
            patches_data = pd.concat(patches_data, ignore_index=True)
            patches_data = patches_data[patches_data['ill_cells_area'] != 0]
            patches_data['patch_area'] = patch_size*patch_size
            patches_data['not_ill_area'] = patches_data['patch_area'] - patches_data['ill_cells_area']
            patches_data.to_csv(f'./tumor_stats/{patch_size}/{filename[:-4]}_patches_stats.csv', index=False)