In [None]:
import os
import cv2
import PIL
import random
import openslide
import skimage.io
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import Image, display

In [None]:
train_df = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv').sample(n=100, random_state=0).reset_index(drop=True)

images = list(train_df['image_id'])
labels = list(train_df['isup_grade'])

In [None]:
data_dir = '../input/prostate-cancer-grade-assessment/train_images/'

## Compute statistics

First we need to write a function to compute the proportion of white pixels in the region.

## Select k-best regions

Then we need a function to sort a list of tuples, where one component of the tuple is the proportion of white pixels in the regions. We are sorting in ascending order.

Since we will only store, the coordinates of the top-left pixel, we need a way to retrieve the k best regions, hence the function hereafter...

## Slide over the image

The main function: the two while loops slide over the image (the first one from top to bottom, the second from left to right). The order does not matter actually.
Then you select the region, compute the statistics of that region, sort the array and select the k-best regions.

## Show the results

Now we will show some results. 

Please note:
1. The smaller the window size, the more precise but the longer.
2. I would say that a window size of around 200 is a good choice. It is a good trade-off between generality, having enough of the biopsy structure captured as well as enough details.
3. A too small window size might harm the performance of the model since you might select only a tiny portion of the biopsy. (To counter this, introducing a random choice might be worth trying).

# sample 2

In [None]:
def compute_statistics(image):
    """
    Args:
        image                  numpy.array   multi-dimensional array of the form WxHxC
    
    Returns:
        ratio_white_pixels     float         ratio of white pixels over total pixels in the image 
    """
    width, height = image.shape[0], image.shape[1]
    num_pixels = width * height

    num_white_pixels = 0

    summed_matrix = np.sum(image, axis=-1)
    # Note: A 3-channel white pixel has RGB (255, 255, 255)
    num_white_pixels = np.count_nonzero(summed_matrix > 700)
    # num_white_pixels = np.count_nonzero(summed_matrix > 255*3-1)
    ratio_white_pixels = num_white_pixels / num_pixels

    # red_concentration = np.mean(image[:,:,0])
    # red_concentration = np.mean(image[:,:,0][summed_matrix!=255*3])
    green_concentration = np.mean(image[:, :, 1])
    # green_concentration = np.mean(image[:,:,1][summed_matrix<200*3])
    blue_concentration = np.mean(image[:, :, 2])
    # blue_concentration = np.mean(image[:,:,2][summed_matrix<200*3])

    return ratio_white_pixels, green_concentration, blue_concentration


def select_k_best_regions(regions, k=20):
    """
    Args:
        regions               list           list of 2-component tuples first component the region, 
                                             second component the ratio of white pixels
                                             
        k                     int            number of regions to select
    """
    # regions = [x for x in regions if x[3] > 100 and x[4] > 100]
    k_best_regions = sorted(regions, key=lambda tup: tup[2])[:k]
    return k_best_regions


def get_k_best_regions(coordinates, image, window_size=512):
    regions = {}
    for i, tup in enumerate(coordinates):
        x, y = tup[0], tup[1]
        regions[i] = image[x : x + window_size, y : y + window_size, :]

    return regions


def detect_best_window_size(image, K=16, scaling_factor=1.0):
    # image = skimage.io.MultiImage(slide_path)[2]
    # image = np.array(image)
    ratio_white_pixels, green_concentration, blue_concentration = compute_statistics(
        image
    )
    # print(ratio_white_pixels, green_concentration, blue_concentration)
    h, w = image.shape[:2]
    return max(
        int(np.sqrt(h * w * (1.0 - ratio_white_pixels) * scaling_factor / K)), 32
    )


def generate_patches(
    slide_path, window_size=128, stride=128, k=20, auto_ws=False, scaling_factor=1.0
):

    image = skimage.io.MultiImage(slide_path)[2]
    image = np.array(image)

    if auto_ws:
        window_size = detect_best_window_size(
            image, K=k, scaling_factor=scaling_factor
        )
        stride = window_size

    max_width, max_height = image.shape[0], image.shape[1]
    regions_container = []
    i = 0

    while window_size + stride * i <= max_height:
        j = 0

        while window_size + stride * j <= max_width:
            x_top_left_pixel = j * stride
            y_top_left_pixel = i * stride

            patch = image[
                x_top_left_pixel : x_top_left_pixel + window_size,
                y_top_left_pixel : y_top_left_pixel + window_size,
                :,
            ]

            (
                ratio_white_pixels,
                green_concentration,
                blue_concentration,
            ) = compute_statistics(patch)

            region_tuple = (
                x_top_left_pixel,
                y_top_left_pixel,
                ratio_white_pixels,
                green_concentration,
                blue_concentration,
            )
            regions_container.append(region_tuple)

            j += 1

        i += 1

    k_best_region_coordinates = select_k_best_regions(regions_container, k=k)
    k_best_regions = get_k_best_regions(k_best_region_coordinates, image, window_size)

    return image, k_best_region_coordinates, k_best_regions, window_size


def glue_to_one_picture_from_coord(url, coordinates, window_size=200, k=16, layer=0):
    side = int(np.sqrt(k))
    slide = openslide.OpenSlide(url)
    lv2_scale = slide.level_downsamples[2]
    scale = slide.level_downsamples[2] / slide.level_downsamples[layer]
    # print(scale)

    # image = np.zeros((int(side*window_size*scale), int(side*window_size*scale), 3), dtype=np.uint8)
    image = np.full(
        (int(side * window_size * scale), int(side * window_size * scale), 3),
        255,
        dtype=np.uint8,
    )
    # print(coordinates)
    for i, patch_coord in enumerate(coordinates):
        x = i // side
        y = i % side
        patch = np.asarray(
            slide.read_region(
                (int(patch_coord[1] * lv2_scale), int(patch_coord[0] * lv2_scale)),
                layer,
                (int(window_size * scale), int(window_size * scale)),
            )
        )[:, :, :3]
        image[
            int(x * window_size * scale) : int(x * window_size * scale)
            + int(window_size * scale),
            int(y * window_size * scale) : int(y * window_size * scale)
            + int(window_size * scale),
            :,
        ] = patch
    slide.close()
    return image

def glue_to_one_picture_from_coord_lowlayer(url, coordinates, window_size=200, k=16, layer=1):
    side = int(np.sqrt(k))
    slide = openslide.OpenSlide(url)
    lv2_scale = slide.level_downsamples[2]
    scale = slide.level_downsamples[2] / slide.level_downsamples[layer]
    # print(scale)
    slide.close()
    
    slide = skimage.io.MultiImage(url)[layer]
    slide = np.array(slide)

    image = np.full((int(side * window_size * scale), int(side * window_size * scale), 3),255,dtype=np.uint8,)
    # print(coordinates)
    for i, patch_coord in enumerate(coordinates):
        x = i // side
        y = i % side
        patch = slide[int(patch_coord[0] * scale): int(patch_coord[0] * scale) + int(window_size * scale),
                      int(patch_coord[1] * scale): int(patch_coord[1] * scale) + int(window_size * scale),:]
        image[int(x * window_size * scale) : int(x * window_size * scale) + int(window_size * scale),
            int(y * window_size * scale) : int(y * window_size * scale) + int(window_size * scale),:,] = patch
    return image

def glue_to_one_picture(image_patches, window_size=200, k=16):
    side = int(np.sqrt(k))
    image = np.zeros((side * window_size, side * window_size, 3), dtype=np.uint8)

    for i, patch in image_patches.items():
        x = i // side
        y = i % side
        image[
            x * window_size : (x + 1) * window_size,
            y * window_size : (y + 1) * window_size,
            :,
        ] = patch

    return image


def load_img(img_name, K=16, scaling_factor=1.0, layer=0, auto_ws=True, window_size=128):
    WINDOW_SIZE = window_size
    STRIDE = window_size
    # K = 16
    image, best_coordinates, best_regions, win = generate_patches(
        img_name,
        window_size=WINDOW_SIZE,
        stride=STRIDE,
        k=K,
        auto_ws=auto_ws,
        scaling_factor=scaling_factor,
    )
    WINDOW_SIZE = win
    STRIDE = WINDOW_SIZE
    # print(win)
    # glued_image = glue_to_one_picture(best_regions, window_size=WINDOW_SIZE, k=K)
    if layer == 0:
        glued_image = glue_to_one_picture_from_coord(
            img_name, best_coordinates, window_size=WINDOW_SIZE, k=K, layer=layer
        )
    else:
        glued_image = glue_to_one_picture_from_coord_lowlayer(
            img_name, best_coordinates, window_size=WINDOW_SIZE, k=K, layer=layer
        )
    return glued_image

In [None]:
img_id = "3790f55cad63053e956fb73027179707"
ex_url = data_dir + img_id + '.tiff'
image = skimage.io.MultiImage(ex_url)[1]
plt.imshow(image)

In [None]:
%%time
img_id = "3790f55cad63053e956fb73027179707"
img_id = images[25]
ex_url = data_dir + img_id + '.tiff'
glued_image = load_img(ex_url, K=16, scaling_factor=1.0, layer=1, auto_ws=True, window_size=128)
#cv2.cvtColor(glued_image, cv2.COLOR_RGB2BGR)
plt.imshow(glued_image)

In [None]:
fig, ax = plt.subplots(6, 2, figsize=(20, 25))
pen_marked_images = [
    'fd6fe1a3985b17d067f2cb4d5bc1e6e1',
    'ebb6a080d72e09f6481721ef9f88c472',
    'ebb6d5ca45942536f78beb451ee43cc4',
    'ea9d52d65500acc9b9d89eb6b82cdcdf',
    'e726a8eac36c3d91c3c4f9edba8ba713',
    'e90abe191f61b6fed6d6781c8305fe4b',
    'fd0bb45eba479a7f7d953f41d574bf9f',
    'ff10f937c3d52eff6ad4dd733f2bc3ac',
    'feee2e895355a921f2b75b54debad328',
    'feac91652a1c5accff08217d19116f1c',
    'fb01a0a69517bb47d7f4699b6217f69d',
    'f00ec753b5618cfb30519db0947fe724',
    'e9a4f528b33479412ee019e155e1a197',
    'f062f6c1128e0e9d51a76747d9018849',
    'f39bf22d9a2f313425ee201932bac91a',
]
#for i, img in enumerate(images[:6]):
for i, img in enumerate(pen_marked_images[:6]):
    url = data_dir + img + '.tiff'
    #image, best_coordinates, best_regions, _ = generate_patches(url, window_size=WINDOW_SIZE, stride=STRIDE, k=K)
    glued_image = load_img(url, K=16, scaling_factor=1.0)
    
    ax[i][0].imshow(image)
    ax[i][0].set_title(f'{img} - Original - Label: {labels[i]}')
    
    ax[i][1].imshow(glued_image)
    ax[i][1].set_title(f'{img} - Glued - Label: {labels[i]}')

fig.suptitle('From biopsy to glued patches')

In [None]:
import sys

print("{}{: >25}{}{: >10}{}".format('|','Variable Name','|','Memory','|'))
print(" ------------------------------------ ")
for var_name in dir():
    if not var_name.startswith("_") and sys.getsizeof(eval(var_name)) > 10000: #ここだけアレンジ
        print("{}{: >25}{}{: >10}{}".format('|',var_name,'|',sys.getsizeof(eval(var_name)),'|'))

In [None]:
del glued_image
del image