In [None]:
!pip -q install tensorflow==2.3.0

In [None]:
# Basics / Data manipulation
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import zipfile
import os

# Visualization
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import skimage.io
from IPython.display import display, HTML

# ML
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

%matplotlib inline

In [None]:
# Folder paths
TRAIN = '../input/prostate-cancer-grade-assessment/train_images'
MASKS = '../input/prostate-cancer-grade-assessment/train_label_masks'

OUT_TRAIN = './trainC.zip'
OUT_VALIDATION = './validationC.zip'
OUT_TEST = './testC.zip'
OUT_MASKS_TRAIN = './masks_trainC.zip'
OUT_MASKS_VALIDATION = './masks_validationC.zip'
OUT_MASKS_TEST = './masks_testC.zip'

BASE_FOLDER = "/kaggle/input/prostate-cancer-grade-assessment/"
!ls {BASE_FOLDER}
BASE_FOLDER2 ="/kaggle/input/panda-tiles/"
!ls {BASE_FOLDER2}

In [None]:
train_dataset = pd.read_csv("../input/8-fold-pc-dataset-gen-0-8/training.csv", usecols=["image_id", "data_provider", "isup_grade", "gleason_score"])
validation_dataset = pd.read_csv("../input/8-fold-pc-dataset-gen-0-8/validation.csv", usecols=["image_id", "data_provider", "isup_grade", "gleason_score"])
test_dataset = pd.read_csv("../input/8-fold-pc-dataset-gen-0-8/testing.csv", usecols=["image_id", "data_provider", "isup_grade", "gleason_score"])

In [None]:
train_IDs = train_dataset["image_id"]
validation_IDs = validation_dataset["image_id"]
test_IDs = test_dataset["image_id"]

In [None]:
not_found_train = []
not_found_validation = []
not_found_test = []

In [None]:
SIZE_IMG = 112
N = 16
def tile(img, mask):
    result = []
    shape = img.shape
    pad0,pad1 = (SIZE_IMG - shape[0]%SIZE_IMG)%SIZE_IMG, (SIZE_IMG - shape[1]%SIZE_IMG)%SIZE_IMG
    img = np.pad(img, [[pad0//2, pad0-pad0//2], [pad1//2, pad1 - pad1//2],[0,0]],
                constant_values=255)
    mask = np.pad(mask,[[pad0//2, pad0-pad0//2], [pad1//2,pad1-pad1//2], [0,0]],
                constant_values=0)
    img = img.reshape(img.shape[0]//SIZE_IMG, SIZE_IMG, img.shape[1]//SIZE_IMG,SIZE_IMG, 3)
    img = img.transpose(0, 2, 1, 3, 4).reshape(-1, SIZE_IMG,SIZE_IMG,3)
    mask = mask.reshape(mask.shape[0]//SIZE_IMG, SIZE_IMG,mask.shape[1]//SIZE_IMG, SIZE_IMG, 3)
    mask = mask.transpose(0, 2, 1, 3, 4).reshape(-1, SIZE_IMG,SIZE_IMG, 3)
    if len(img) < N:
        mask = np.pad(mask, [[0, N-len(img)], [0, 0], [0, 0],[0, 0]], constant_values=0)
        img = np.pad(img, [[0, N-len(img)],[0, 0],[0, 0], [0, 0]], constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0], -1).sum(-1))[: N]
    img = img[idxs]
    mask = mask[idxs]
    
    for i in range(len(img)):
        result.append({'img':img[i], 'mask':mask[i], 'idx':i})

    return result

In [None]:
def multiplyTiles(tiles):
#     variationA = []
#     variationB = []
#     variationC = []
#     variationD = []
#     variationE = []
#     variationF = []
#     variationG = []
#     variationH = []
    for t in range(len(tiles)):
        
        # Original Tile (A)
        tile_a_img = tiles[t]['img']
        tile_a_mask = tiles[t]['mask']
        tile_a_idx = tiles[t]['idx']
        tile_a = {"img": tile_a_img, "mask": tile_a_mask, "idx": tile_a_idx}
        
#         # Rotated Tiles (B, C, D)
#         tile_b_img = np.rot90(tile_a_img)
#         tile_b_mask = np.rot90(tile_a_mask)
#         tile_b_idx = tile_a_idx
#         tile_b = {"img": tile_b_img, "mask": tile_b_mask, "idx": tile_b_idx}
        
        tile_c_img = np.rot90(np.rot90(tile_a_img))
        tile_c_mask = np.rot90(np.rot90(tile_a_mask))
        tile_c_idx = tile_a_idx
        tile_c = {"img": tile_c_img, "mask": tile_c_mask, "idx": tile_c_idx}
        
#         tile_d_img = np.rot90(np.rot90(np.rot90(tile_a_img)))
#         tile_d_mask = np.rot90(np.rot90(np.rot90(tile_a_mask)))
#         tile_d_idx = tile_a_idx
#         tile_d = {"img": tile_d_img, "mask": tile_d_mask, "idx": tile_d_idx}
        
#         # Mirrored Original Tile (A:E)
#         tile_e_img = np.fliplr(tile_a_img)
#         tile_e_mask = np.fliplr(tile_a_mask)
#         tile_e_idx = tile_a_idx
#         tile_e = {"img": tile_e_img, "mask": tile_e_mask, "idx": tile_e_idx}        
        
#         # Mirrored Rotated Tiles (B:F, C:G, D:H)
#         tile_f_img = np.fliplr(np.rot90(tile_a_img))
#         tile_f_mask = np.fliplr(np.rot90(tile_a_mask))
#         tile_f_idx = tile_a_idx
#         tile_f = {"img": tile_f_img, "mask": tile_f_mask, "idx": tile_f_idx}
        
#         tile_g_img = np.fliplr(np.rot90(np.rot90(tile_a_img)))
#         tile_g_mask = np.fliplr(np.rot90(np.rot90(tile_a_mask)))
#         tile_g_idx = tile_a_idx
#         tile_g = {"img": tile_g_img, "mask": tile_g_mask, "idx": tile_g_idx}
        
#         tile_h_img = np.fliplr(np.rot90(np.rot90(np.rot90(tile_a_img))))
#         tile_h_mask = np.fliplr(np.rot90(np.rot90(np.rot90(tile_a_mask))))
#         tile_h_idx = tile_a_idx
#         tile_h = {"img": tile_h_img, "mask": tile_h_mask, "idx": tile_h_idx}        
        
        
#         variationA.append(tile_a)
#         variationB.append(tile_b)
#         variationC.append(tile_c)
#         variationD.append(tile_d)
#         variationE.append(tile_e)
#         variationF.append(tile_f)
#         variationG.append(tile_g)
#         variationH.append(tile_h)
        
#        tile_set = [variationA, variationB, variationC, variationD, variationE, variationF, variationG, variationH]
         tile_set = [variationC]

    return tile_set

In [None]:
def concat_tile(im_list_2d):
    return cv2.vconcat([cv2.hconcat(im_list_h) for im_list_h in im_list_2d])

def mosaic(tiles):

    im1 = tiles[0]["img"]
    im2 = tiles[1]["img"]
    im3 = tiles[2]["img"]
    im4 = tiles[3]["img"]

    im5 = tiles[4]["img"]
    im6 = tiles[5]["img"]
    im7 = tiles[6]["img"]
    im8 = tiles[7]["img"]

    im9 = tiles[8]["img"]
    im10 = tiles[9]["img"]
    im11 = tiles[10]["img"]
    im12 = tiles[11]["img"]

    im13 = tiles[12]["img"]
    im14 = tiles[13]["img"]
    im15 = tiles[14]["img"]
    im16 = tiles[15]["img"]

    im_tile = concat_tile([[im1, im2, im3, im4],
                           [im5, im6, im7, im8],
                           [im9, im10, im11, im12],
                           [im13, im14, im15, im16]])
    return im_tile


In [None]:
def generate_dataset(ids, dataset_type):
    if dataset_type == "train":
        x_tot,x2_tot = [], []
        with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out,\
         zipfile.ZipFile(OUT_MASKS_TRAIN, 'w') as mask_out:
            for gleason_score, id in enumerate(tqdm(ids)):
                try:
                    img = skimage.io.MultiImage(os.path.join(TRAIN,id+'.tiff'))[1]
                    mask = skimage.io.MultiImage(os.path.join(MASKS,id+'_mask.tiff'))[1]
                    tiles = tile(img,mask)
                    
                    #[tiles_A, tiles_B, tiles_C, tiles_D, tiles_E, tiles_F, tiles_G, tiles_H] = multiplyTiles(tiles)
                    [tiles_C] = multiplyTiles(tiles)


                    #tiles_C:    
                    img = mosaic(tiles_C)
                    x_tot.append((img/255.0).reshape(-1,3).mean(0))
                    x2_tot.append(((img/255.0)**2).reshape(-1,3).mean(0))
                    # If read with PIL RGB turns into BGR
                    img = cv2.imencode('.png',cv2.cvtColor(img, cv2.COLOR_RGB2BGR))[1]
                    # Uncomment to classify by ISUP GRADE 
                    # img_out.writestr(f'train/ISUP_GRADE_{train_dataset["isup_grade"][isup_grade]}/{id}_{idx}.png', img)
                    img_out.writestr(f'train/GLEASON_SCORE_{train_dataset["gleason_score"][gleason_score]}/{id}-variationC.png', img)

                except Exception as e:
                    not_found_train.append(id)
        print(f"INFO: Not images found in train: {len(not_found_train)}")
        
    elif dataset_type == "valid": 
        x_tot,x2_tot = [], []
        with zipfile.ZipFile(OUT_VALIDATION, 'w') as img_out,\
         zipfile.ZipFile(OUT_MASKS_VALIDATION, 'w') as mask_out:
            for gleason_score, id in enumerate(tqdm(ids)):
                try:
                    img = skimage.io.MultiImage(os.path.join(TRAIN,id+'.tiff'))[1]
                    mask = skimage.io.MultiImage(os.path.join(MASKS,id+'_mask.tiff'))[1]
                    tiles = tile(img,mask)
                    img = mosaic(tiles)
                    
                    #[tiles_A, tiles_B, tiles_C, tiles_D, tiles_E, tiles_F, tiles_G, tiles_H] = multiplyTiles(tiles)
                    [tiles_C] = multiplyTiles(tiles)
                    
                    #tiles_C:    
                    img = mosaic(tiles_C)
                    x_tot.append((img/255.0).reshape(-1,3).mean(0))
                    x2_tot.append(((img/255.0)**2).reshape(-1,3).mean(0)) 
                    # If read with PIL RGB turns into BGR
                    img = cv2.imencode('.png',cv2.cvtColor(img, cv2.COLOR_RGB2BGR))[1]
                    # Uncomment to classify by ISUP GRADE 
                    # img_out.writestr(f'test/ISUP_GRADE_{train_dataset["isup_grade"][isup_grade]}/{id}_{idx}.png', img)
                    img_out.writestr(f'validation/GLEASON_SCORE_{validation_dataset["gleason_score"][gleason_score]}/{id}-variationC.png', img)
                    
                except Exception as e:
                    not_found_validation.append(id)

        print(f"INFO: Not images found in validation: {len(not_found_validation)}")
        
    elif dataset_type == "test":  
        x_tot,x2_tot = [], []
        with zipfile.ZipFile(OUT_TEST, 'w') as img_out,\
         zipfile.ZipFile(OUT_MASKS_TEST, 'w') as mask_out:
            for gleason_score, id in enumerate(tqdm(ids)):
                try:
                    img = skimage.io.MultiImage(os.path.join(TRAIN,id+'.tiff'))[1]
                    mask = skimage.io.MultiImage(os.path.join(MASKS,id+'_mask.tiff'))[1]
                    tiles = tile(img,mask)
                    
                    #[tiles_A, tiles_B, tiles_C, tiles_D, tiles_E, tiles_F, tiles_G, tiles_H] = multiplyTiles(tiles)
                    [tiles_C] = multiplyTiles(tiles)

                    #tiles_C:
                    img = mosaic(tiles_C)
                    x_tot.append((img/255.0).reshape(-1,3).mean(0))
                    x2_tot.append(((img/255.0)**2).reshape(-1,3).mean(0)) 
                    # If read with PIL RGB turns into BGR
                    img = cv2.imencode('.png',cv2.cvtColor(img, cv2.COLOR_RGB2BGR))[1]
                    # Uncomment to classify by ISUP GRADE 
                    # img_out.writestr(f'test/ISUP_GRADE_{train_dataset["isup_grade"][isup_grade]}/{id}_{idx}.png', img)
                    img_out.writestr(f'test/GLEASON_SCORE_{test_dataset["gleason_score"][gleason_score]}/{id}-variationC.png', img)

                except Exception as e:
                    not_found_test.append(id)

        print(f"INFO: Not images found in test: {len(not_found_test)}")

In [None]:
generate_dataset(train_IDs, dataset_type='train')
generate_dataset(validation_IDs, dataset_type='valid')
generate_dataset(test_IDs, dataset_type='test')

## Removing Lost/Corrupted Data

In [None]:
nf = open("not_found_train.txt", "w")
for each in not_found_train:
    nf.write(each)
    nf.write("\n")
nf.close()

In [None]:
nf = open("not_found_validation.txt", "w")
for each in not_found_validation:
    nf.write(each)
    nf.write("\n")
nf.close()

In [None]:
nf = open("not_found_test.txt", "w")
for each in not_found_test:
    nf.write(each)
    nf.write("\n")
nf.close()