In [None]:
from pannuke_dataset import PanNukeDataModule
from monusac_dataset import MoNuSACDataModule
import matplotlib.pyplot as plt
import numpy as np 
import cv2
from pathlib import Path
import polars as pl
import os
import shutil
from data_utils import rm_n_mkdir, bounding_box

In [None]:
dm_pannuke = PanNukeDataModule('/path/to/data/pannuke/processed')
dm_monusac = MoNuSACDataModule('/path/to/data/monusac/processed')

crop_pad = 4
px_expansion = 16
exp_pad = crop_pad + px_expansion
min_area = 90
cell_patch_size = (64, 64)
pad_method = 'symmetric' # 'reflect' or 'symmetric'

selected_folds = [1,2,3]
pannuke_dataset = dm_pannuke.get_dataset(selected_folds=selected_folds)
monusac_dataset = dm_monusac.get_dataset(selected_folds=selected_folds)

print (pannuke_dataset.labels)
print (monusac_dataset.labels)

In [None]:
save_path = Path(f"pannuke_inflam_{cell_patch_size[0]}x{cell_patch_size[1]}")
save_path_imgs = Path(save_path, "images")
rm_n_mkdir(save_path_imgs)
cid = 0

for fold in selected_folds:
    pnk_dataset = dm_pannuke.get_dataset([fold])

    # Channels:
    # 0: 'Neoplastic', 
    # 1: 'Inflammatory', 
    # 2: 'Connective', 
    # 3: 'Dead', 
    # 4: 'Epithelial'

    # Extract only inflammatory channel
    for dataset_entry in pnk_dataset:
        img, label_maps = dataset_entry
        img = img.transpose(1, 2, 0)
                
        # if 'lung' not in tissue_type.lower():
        #     continue

        for c_type in ['Inflammatory']: 
            if c_type == 'Inflammatory':
                c_type_map = label_maps[1]

            if np.unique(c_type_map).shape[0] == 1:
                continue

            # crop the image and the c_type_map
            c_type_map = c_type_map[crop_pad:-crop_pad, crop_pad:-crop_pad]
            img = img[crop_pad:-crop_pad, crop_pad:-crop_pad]
            # pad the inflammatory_map
            c_type_map = np.pad(c_type_map, exp_pad, mode='constant', constant_values=0)
            # pad img in all directions
            pad_width = ((exp_pad, exp_pad), (exp_pad, exp_pad), (0, 0))
            # Pad the image using mirror strategy
            img = np.pad(img, pad_width, mode=pad_method)

            # if np.unique(c_type_map).shape[0] == 1:
            #     continue

            for cell_id in np.unique(c_type_map):
                if cell_id == 0:
                    continue
                cell_map = np.zeros_like(c_type_map)
                cell_map[c_type_map == cell_id] = 1
                area = np.sum(cell_map)

                if area < min_area:
                    continue
                    
                rmin, rmax, cmin, cmax = bounding_box(cell_map)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]

                rmin = max(0, rmin - px_expansion)
                rmax = min(cell_map.shape[0], rmax + px_expansion)
                cmin = max(0, cmin - px_expansion)
                cmax = min(cell_map.shape[1], cmax + px_expansion)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]
                
                # resize to 64x64
                cell_img = cv2.resize(cell_img, cell_patch_size, interpolation=cv2.INTER_CUBIC)
                
                cell_img = cv2.cvtColor(cell_img, cv2.COLOR_BGR2RGB)
                uid = f"{cid}_{int(area)}"
                cid += 1
                cv2.imwrite(str(Path(save_path_imgs, f"f{fold}_{uid}_{c_type}.png")), cell_img)

In [None]:
save_path = Path(f"pannuke_neo_epi_{cell_patch_size[0]}x{cell_patch_size[1]}")
save_path_imgs = Path(save_path, "images")
rm_n_mkdir(save_path_imgs)
cid = 0

for fold in selected_folds:
    pnk_dataset = dm_pannuke.get_dataset([fold])

    # Channels:
    # 0: 'Neoplastic', 
    # 1: 'Inflammatory', 
    # 2: 'Connective', 
    # 3: 'Dead', 
    # 4: 'Epithelial'

    for dataset_entry in pnk_dataset:
        img, label_maps = dataset_entry
        img = img.transpose(1, 2, 0)
        
        # if 'lung' not in tissue_type.lower():
        #     continue
        
        for c_type in ['Neoplastic', 'Epithelial']: 
            if c_type == 'Neoplastic':
                c_type_map = label_maps[0]
            if c_type == 'Epithelial':
                c_type_map = label_maps[4]

            if np.unique(c_type_map).shape[0] == 1:
                continue

            # crop the image and the c_type_map
            c_type_map = c_type_map[crop_pad:-crop_pad, crop_pad:-crop_pad]
            img = img[crop_pad:-crop_pad, crop_pad:-crop_pad]

            # pad the inflammatory_map
            c_type_map = np.pad(c_type_map, exp_pad, mode='constant', constant_values=0)
            
            # pad img in all directions
            pad_width = ((exp_pad, exp_pad), (exp_pad, exp_pad), (0, 0))
            
            # Pad the image using mirror strategy
            img = np.pad(img, pad_width, mode=pad_method)
            
            for cell_id in np.unique(c_type_map):
                if cell_id == 0:
                    continue
                cell_map = np.zeros_like(c_type_map)
                cell_map[c_type_map == cell_id] = 1
                area = np.sum(cell_map)

                if area < min_area:
                    continue

                rmin, rmax, cmin, cmax = bounding_box(cell_map)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]

                rmin = max(0, rmin - px_expansion)
                rmax = min(cell_map.shape[0], rmax + px_expansion)
                cmin = max(0, cmin - px_expansion)
                cmax = min(cell_map.shape[1], cmax + px_expansion)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]

                # resize to 64x64
                cell_img = cv2.resize(cell_img, cell_patch_size, interpolation=cv2.INTER_CUBIC)

                cell_img = cv2.cvtColor(cell_img, cv2.COLOR_BGR2RGB)
                uid = f"{cid}_{int(area)}"
                cid += 1
                cv2.imwrite(str(Path(save_path_imgs, f"f{fold}_{uid}_{c_type}.png")), cell_img)


In [None]:
save_path = Path(f"monusac_epi_{cell_patch_size[0]}x{cell_patch_size[1]}")
save_path_imgs = Path(save_path, "images")
rm_n_mkdir(save_path_imgs)
cid = 0

for fold in selected_folds:
    monusac_dataset = dm_monusac.get_dataset([fold])

    # Channels:
    # 0: 'Epithelial', # All types of epithelial cells
    # 1: 'Lymphocyte', # Lymphocyte type of immune cells
    # 2: 'Macrophage', # Macrophages type of immune cells
    # 3: 'Neutrophil', # Neutrophils type of immune cells
    # 4: 'Ambiguous',  # Only for the test set

    for dataset_entry in monusac_dataset:
        img, label_maps = dataset_entry
        img = img.transpose(1, 2, 0)
        
        for c_type in ['Epithelial']: 
            if c_type == 'Epithelial':
                c_type_map = label_maps[0]

            if np.unique(c_type_map).shape[0] == 1:
                continue

            # crop the image and the c_type_map
            c_type_map = c_type_map[crop_pad:-crop_pad, crop_pad:-crop_pad]
            img = img[crop_pad:-crop_pad, crop_pad:-crop_pad]
            # pad the inflammatory_map
            c_type_map = np.pad(c_type_map, exp_pad, mode='constant', constant_values=0)
            # pad img in all directions
            pad_width = ((exp_pad, exp_pad), (exp_pad, exp_pad), (0, 0))
            # Pad the image using mirror strategy
            img = np.pad(img, pad_width, mode=pad_method)

            for cell_id in np.unique(c_type_map):
                if cell_id == 0:
                    continue
                cell_map = np.zeros_like(c_type_map)
                cell_map[c_type_map == cell_id] = 1
                area = np.sum(cell_map)

                if area < min_area:
                    continue
                    
                rmin, rmax, cmin, cmax = bounding_box(cell_map)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]

                rmin = max(0, rmin - px_expansion)
                rmax = min(cell_map.shape[0], rmax + px_expansion)
                cmin = max(0, cmin - px_expansion)
                cmax = min(cell_map.shape[1], cmax + px_expansion)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]
                
                # resize to 64x64
                cell_img = cv2.resize(cell_img, cell_patch_size, interpolation=cv2.INTER_CUBIC)

                cell_img = cv2.cvtColor(cell_img, cv2.COLOR_BGR2RGB)
                uid = f"{cid}_{int(area)}"
                cid += 1
                cv2.imwrite(str(Path(save_path_imgs, f"f{fold}_{uid}_{c_type}.png")), cell_img)


In [None]:
save_path = Path(f"monusac_lymph_macro_neutro_{cell_patch_size[0]}x{cell_patch_size[1]}")
save_path_imgs = Path(save_path, "images")
rm_n_mkdir(save_path_imgs)
cid = 0

for fold in selected_folds:
    monusac_dataset = dm_monusac.get_dataset([fold])

    # Channels:
    # 0: 'Epithelial', # All types of epithelial cells
    # 1: 'Lymphocyte', # Lymphocyte type of immune cells
    # 2: 'Macrophage', # Macrophages type of immune cells
    # 3: 'Neutrophil', # Neutrophils type of immune cells
    # 4: 'Ambiguous',  # Only for the test set

    for dataset_entry in monusac_dataset:
        img, label_maps = dataset_entry
        img = img.transpose(1, 2, 0)
        
        for c_type in ['Lymphocyte', 'Macrophage', 'Neutrophil']: 
            if c_type == 'Lymphocyte':
                c_type_map = label_maps[1]
            if c_type == 'Macrophage':
                c_type_map = label_maps[2]
            if c_type == 'Neutrophil':
                c_type_map = label_maps[3]
            
            if np.unique(c_type_map).shape[0] == 1:
                continue

            # crop the image and the c_type_map
            c_type_map = c_type_map[crop_pad:-crop_pad, crop_pad:-crop_pad]
            img = img[crop_pad:-crop_pad, crop_pad:-crop_pad]
            # pad the inflammatory_map
            c_type_map = np.pad(c_type_map, exp_pad, mode='constant', constant_values=0)
            # pad img in all directions
            pad_width = ((exp_pad, exp_pad), (exp_pad, exp_pad), (0, 0))
            # Pad the image using mirror strategy
            img = np.pad(img, pad_width, mode=pad_method)
            
            for cell_id in np.unique(c_type_map):
                if cell_id == 0:
                    continue
                cell_map = np.zeros_like(c_type_map)
                cell_map[c_type_map == cell_id] = 1
                area = np.sum(cell_map)

                if area < min_area:
                    continue
                    
                rmin, rmax, cmin, cmax = bounding_box(cell_map)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]

                rmin = max(0, rmin - px_expansion)
                rmax = min(cell_map.shape[0], rmax + px_expansion)
                cmin = max(0, cmin - px_expansion)
                cmax = min(cell_map.shape[1], cmax + px_expansion)
                cell_img = img[rmin:rmax+1, cmin:cmax+1]
                
                # resize to 64x64
                cell_img = cv2.resize(cell_img, cell_patch_size, interpolation=cv2.INTER_CUBIC)

                cell_img = cv2.cvtColor(cell_img, cv2.COLOR_BGR2RGB)
                uid = f"{cid}_{int(area)}"
                cid += 1
                cv2.imwrite(str(Path(save_path_imgs, f"f{fold}_{uid}_{c_type}.png")), cell_img)
                