In [1]:
import sys
import os
import numpy as np
import cv2
import openslide
from openslide import open_slide
from openslide.deepzoom import DeepZoomGenerator
import xml.etree.ElementTree as ET
from xml.dom import minidom
import geojson
import argparse
import matplotlib.pyplot as plt
import fastai
from fastai.vision.all import *
import PIL
matplotlib.use('Agg')
import pandas as pd
import datetime
from skimage import draw, measure, morphology, filters
from shapely.geometry import Polygon, Point, MultiPoint, MultiPolygon, shape
from shapely.ops import cascaded_union, unary_union
import json
import shapely
import warnings
from scipy import ndimage
sys.path.insert(0, '../Utils/')
from Utils import generate_deepzoom_tiles, extract_tile_start_end_coords
warnings.filterwarnings("ignore")

  warn(


In [None]:
def do_mask(self,img,lvl_resize):
    ''' create tissue mask '''
    # get he image and find tissue mask
    he = np.array(img)
    he = he[:, :, 0:3]
    heHSV = cv2.cvtColor(he, cv2.COLOR_BGR2GRAY)
    ret, thresh1 = cv2.threshold(heHSV, 120, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    imagem = cv2.bitwise_not(thresh1)
    tissue_mask = morphology.binary_dilation(imagem, morphology.disk(radius=2))
    tissue_mask = morphology.remove_small_objects(tissue_mask, 1000)
    tissue_mask = ndimage.binary_fill_holes(tissue_mask)

    # create polygons for faster tiling in cancer detection step
    polygons = []
    contours, hier = cv2.findContours(tissue_mask.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        cvals = contour.transpose(0, 2, 1)
        cvals = np.reshape(cvals, (cvals.shape[0], 2))
        cvals = cvals.astype('float64')
        for i in range(len(cvals)):
            cvals[i][0] = np.round(cvals[i][0]*lvl_resize,2)
            cvals[i][1] = np.round(cvals[i][1]*lvl_resize,2)
        # try:
        poly = Polygon(cvals)
        if poly.length > 0:
            polygons.append(Polygon(poly.exterior))
    print('creating union')
    tissue = unary_union(polygons)

    return tissue, tissue_mask

def check_tissue(self, tile_starts, tile_ends, roi):
    ''' checks if tile in tissue '''
    tile_box = [tile_starts[0], tile_starts[1]], [tile_starts[0], tile_ends[1]], [tile_ends[0], tile_starts[1]], [tile_ends[0], tile_ends[1]]
    tile_box = list(tile_box)
    tile_box = MultiPoint(tile_box).convex_hull
    ov = 0  # initialize
    if tile_box.intersects(roi):
        ov_reg = tile_box.intersection(roi)
        ov += ov_reg.area / tile_box.area

    return ov

def whitespace_check(self, im):
    ''' checks if meets whitespace requirement'''
    bw = im.convert('L')
    bw = np.array(bw)
    bw = bw.astype('float')
    bw = bw / 255
    prop_ws = (bw > 0.8).sum() / (bw > 0).sum()
    return prop_ws

def cancer_mask(self,preds,hetissue):
    ''' smooth cancer map and find high probability areas '''
    preds[hetissue < 1] = 0
    preds_mask = np.zeros(preds.shape)
    preds_mask[preds > 0.3] = 1
    preds_mask = morphology.binary_dilation(preds_mask, morphology.disk(radius=2))
    preds_mask = morphology.binary_erosion(preds_mask, morphology.disk(radius=2))
    preds_mask = ndimage.binary_fill_holes(preds_mask)
    return preds_mask


def slide_ROIS(self,polygons,mpp,savename,labels,ref,roi_color):
    ''' generate geojson from polygons '''
    all_polys = unary_union(polygons)
    final_polys = []
    if all_polys.type == 'Polygon':
        poly = all_polys
        polypoints = poly.exterior.xy
        polyx = [np.round(number - ref[0], 1) for number in polypoints[0]]
        polyy = [np.round(number - ref[1], 1) for number in polypoints[1]]
        newpoly = Polygon(zip(polyx, polyy))
        if newpoly.area*mpp*mpp > 0.1:
            final_polys.append(newpoly)

    else:
        for poly in all_polys:
            # print(poly)
            if poly.type == 'Polygon':
                polypoints = poly.exterior.xy
                polyx = [np.round(number - ref[0], 1) for number in polypoints[0]]
                polyy = [np.round(number - ref[1], 1) for number in polypoints[1]]
                newpoly = Polygon(zip(polyx, polyy))
                if newpoly.area*mpp*mpp > 0.1:
                    final_polys.append(newpoly)
            if poly.type == 'MultiPolygon':
                for roii in poly.geoms:
                    polypoints = roii.exterior.xy
                    polyx = [np.round(number - ref[0], 1) for number in polypoints[0]]
                    polyy = [np.round(number - ref[1], 1) for number in polypoints[1]]
                    newpoly = Polygon(zip(polyx, polyy))
                    if newpoly.area*mpp*mpp > 0.1:
                        final_polys.append(newpoly)

    final_shape = unary_union(final_polys)
    try:
        trythis = '['
        for i in range(0, len(final_shape)):
            trythis += json.dumps(
                {"type": "Feature", "id": "PathAnnotationObject", "geometry": shapely.geometry.mapping(final_shape[i]),
                "properties": {"classification": {"name": labels, "colorRGB": roi_color}, "isLocked": False,
                                "measurements": []}}, indent=4)
            if i < len(final_shape) - 1:
                trythis += ','
        trythis += ']'
    except:
        trythis = '['
        trythis += json.dumps(
            {"type": "Feature", "id": "PathAnnotationObject", "geometry": shapely.geometry.mapping(final_shape),
            "properties": {"classification": {"name": labels, "colorRGB": roi_color}, "isLocked": False,
                            "measurements": []}}, indent=4)
        trythis += ']'

    with open(savename, 'w') as outfile:
        outfile.write(trythis)
    return

def tile_ROIS(self,mask_arr,lvl_resize):
    ''' get cancer polygons '''
    polygons = []
    contours, hier = cv2.findContours(mask_arr.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    print(len(contours))
    for contour in contours:
        cvals = contour.transpose(0, 2, 1)
        cvals = np.reshape(cvals, (cvals.shape[0], 2))
        cvals = cvals.astype('float64')
        for i in range(len(cvals)):
            cvals[i][0] = np.round(cvals[i][0]*lvl_resize,2)
            cvals[i][1] = np.round(cvals[i][1]*lvl_resize,2)
        try:
            poly = Polygon(cvals)
            if poly.length > 0:
                polygons.append(Polygon(poly.exterior))
        except:
            pass
    print(len(polygons))
    return polygons

In [None]:
cur_wd = '/fh/scratch/delete90/etzioni_r/lucas_l/michael_project/mutation_pred/'
save_location = cur_wd + 'intermediate_data/cancer_prediction_results110224/'  
mag_extract = [20] # do not change this, model trained at 250x250 at 20x
save_image_size = 250  # do not change this, model trained at 250x250 at 20x
run_image_size = 250 # do not change this, model trained at 250x250 at 20x
pixel_overlap = 0  # specify the level of pixel overlap in your saved images
limit_bounds = True  # this is weird, dont change it
smooth = True # whether or not to gaussian smooth the output probability map
model_path2 = cur_wd + 'models/cancer_detection_models/mets/ft_models/dlv3_2ep_2e4_update-07182023_RT_fine_tuned..pkl'

In [None]:
if not os.path.exists(os.path.join(save_location)):
    os.mkdir(os.path.join(save_location))

In [None]:
cur_id= '(2017-0133) 23-B_A1-8'
_file = '/fh/scratch/delete90/haffner_m/user/scan_archives/Prostate/MDAnderson/CCola/all_slides/' + cur_id + '.svs'

#Load slides
oslide = openslide.OpenSlide(_file)
save_name = str(Path(os.path.basename(_file)).with_suffix(''))

save_location5 = save_location4 + "/" + cur_id + "/" 
create_dir_if_not_exists(save_location5)

In [None]:

oslide = openslide.OpenSlide(_file)
savnm = os.path.basename(_file)
save_name = str(Path(savnm).with_suffix(''))

In [None]:
tiles, tile_lvls, physSize, base_mag = generate_deepzoom_tiles(oslide,save_image_size, pixel_overlap, limit_bounds)

In [None]:
def parseMeta_and_pullTiles(self,flist):


    for _file in flist:


        # 1.25X for tissue detection
        lvl_resize =4*(base_mag / 5)
        getLvl = oslide.get_best_level_for_downsample(lvl_resize)
        lvl_img = oslide.read_region((0, 0), getLvl, oslide.level_dimensions[getLvl])
        lvl_img = lvl_img.resize(size=(int(np.ceil(oslide.level_dimensions[0][0]/lvl_resize)),int(np.ceil(oslide.level_dimensions[0][1]/lvl_resize))),resample=PIL.Image.LANCZOS)
        tissue, he_mask = self.do_mask(lvl_img, lvl_resize)
        self.slide_ROIS(polygons=tissue, mpp=float(oslide.properties[openslide.PROPERTY_NAME_MPP_X]),
                        savename=os.path.join(self.save_location, save_name + '_tissue.json'),
                        labels='tissue', ref=[0, 0], roi_color=-16770432)
        lvl_img.save(os.path.join(self.save_location, save_name + '_low-res.png'))
        lvl_mask = PIL.Image.fromarray(np.uint8(he_mask * 255))
        lvl_mask = lvl_mask.convert('L')
        lvl_mask.save(os.path.join(self.save_location, save_name + '_tissue.png'))



        # 2.5x for probability maps
        lvl_resize = 2*(base_mag / 5)
        x_map = np.zeros((int(np.ceil(oslide.level_dimensions[0][1]/lvl_resize)),int(np.ceil(oslide.level_dimensions[0][0]/lvl_resize))), float)
        x_count = np.zeros((int(np.ceil(oslide.level_dimensions[0][1]/lvl_resize)),int(np.ceil(oslide.level_dimensions[0][0]/lvl_resize))), float)


        print('starting inference')
        # pull tiles from levels specified by self.mag_extract
        for lvl in self.mag_extract:
            # print('made it to lvl')
            # lvl_resize = lvl_resize * (lvl/base_mag)
            if lvl in tile_lvls:
                # pull tile info for level
                x_tiles, y_tiles = tiles.level_tiles[tile_lvls.index(lvl)]
                # print(x_tiles, y_tiles)
                for y in range(0, y_tiles):
                    for x in range(0, x_tiles):

                        # grab tile coordinates
                        tile_coords = tiles.get_tile_coordinates(tile_lvls.index(lvl), (x, y))
                        save_coords = str(tile_coords[0][0]) + "-" + str(tile_coords[0][1]) + "_" + '%.0f' % (tiles._l0_l_downsamples[tile_coords[1]] * tile_coords[2][0]) + "-" + '%.0f' % (tiles._l0_l_downsamples[tile_coords[1]] * tile_coords[2][1])
                        tile_ends = (int(tile_coords[0][0] + tiles._l0_l_downsamples[tile_coords[1]] * tile_coords[2][0]),int(tile_coords[0][1] + tiles._l0_l_downsamples[tile_coords[1]] * tile_coords[2][1]))

                        # check for tissue membership
                        tile_tiss = self.check_tissue(tile_starts=tile_coords[0], tile_ends=tile_ends,roi=tissue)

                        if tile_tiss > 0.9:
                            tile_pull = tiles.get_tile(tile_lvls.index(lvl), (x, y))
                            # tile_copy = tiles.get_tile(tile_lvls.index(lvl), (x, y))
                            ws = self.whitespace_check(im=tile_pull)

                            if ws < 0.9:
                                tile_pull = tile_pull.resize(size=(self.save_image_size, self.save_image_size),resample=PIL.Image.LANCZOS)

                                #segmentation
                                tile_pull = np.array(tile_pull)
                                with learn.no_bar():
                                    pred_class, pred_idx, outputs = learn.predict(tile_pull[:, :, 0:3])
                                outputs_np = outputs.numpy()
                                output_np = cv2.resize(outputs_np[1],(int(np.ceil(tile_ends[0] / lvl_resize))-int(np.ceil(tile_coords[0][0] / lvl_resize)),int(np.ceil(tile_ends[1] / lvl_resize))-int(np.ceil(tile_coords[0][1] / lvl_resize))))
                                output_np = output_np.round(2)
                                try:
                                    x_count[int(np.ceil(tile_coords[0][1] / lvl_resize)):int(np.ceil(tile_ends[1] / lvl_resize)),int(np.ceil(tile_coords[0][0] / lvl_resize)):int(np.ceil(tile_ends[0] / lvl_resize))] += 1
                                    x_map[int(np.ceil(tile_coords[0][1] / lvl_resize)):int(np.ceil(tile_ends[1] / lvl_resize)),int(np.ceil(tile_coords[0][0] / lvl_resize)):int(np.ceil(tile_ends[0] / lvl_resize))] += output_np
                                except:
                                    pass
            else:
                print("WARNING: YOU ENTERED AN INCORRECT MAGNIFICATION LEVEL")


        x_count = np.where(x_count < 1, 1, x_count)
        countimg = PIL.Image.fromarray(np.uint8(x_count * 100))
        # countimg.save(os.path.join(self.save_location, save_name + '_tiles.jpeg'))
        x_map = x_map / x_count
        x_map[x_map>1]=1
        print('post-processing')
        if self.smooth == True:
            x_sm = filters.gaussian(x_map, sigma=2)
        if self.smooth == False:
            x_sm = x_map
        cmap = plt.get_cmap('jet')
        rgba_img = cmap(x_sm)
        rgb_img = np.delete(rgba_img, 3, 2)
        colimg = PIL.Image.fromarray(np.uint8(rgb_img * 255))
        colimg.save(os.path.join(self.save_location, save_name + '_cancer_prob.jpeg'))
        binary_preds = self.cancer_mask(x_sm,cv2.resize(np.uint8(he_mask),(x_sm.shape[1],x_sm.shape[0])))
        print('saving...')
        polygons = self.tile_ROIS(mask_arr=binary_preds, lvl_resize=lvl_resize)
        self.slide_ROIS(polygons=polygons, mpp=float(oslide.properties[openslide.PROPERTY_NAME_MPP_X]),
                        savename=os.path.join(self.save_location,save_name+'_cancer.json'), labels='AI_tumor', ref=[0,0], roi_color=-16711936)
        print('done!')
        #
        # except:
        #     pass

    return
