In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import openslide
import os
import cv2
import PIL
from IPython.display import Image, display
from keras.applications.vgg16 import VGG16,preprocess_input
# Plotly for the interactive viewer (see last section)
import plotly.graph_objs as go
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split
from keras.models import Sequential, Model,load_model
from keras.applications.vgg16 import VGG16,preprocess_input
from keras.applications.resnet50 import ResNet50
from keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten,BatchNormalization,Activation
from keras.layers import GlobalMaxPooling2D
from keras.models import Model
from keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import gc
import skimage.io
from sklearn.model_selection import KFold
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.
import tensorflow as tf
from tensorflow.python.keras import backend as K
sess = K.get_session()

In [None]:
import pandas as pd
import skimage.io
from skimage.io import imshow,show
from skimage.transform import resize
from skimage.util import montage
import numpy as np
import matplotlib.pyplot as plt
import os

INPUT_DIR = "../input/prostate-cancer-grade-assessment"
TRAIN_DIR = f"{INPUT_DIR}/train_images"
MASK_DIR = f"{INPUT_DIR}/train_label_masks"



N = 16


In [None]:
def compute_statistics(image):
    """
    Args:
        image                  numpy.array   multi-dimensional array of the form WxHxC
    
    Returns:
        ratio_white_pixels     float         ratio of white pixels over total pixels in the image 
    """
    width, height = image.shape[0], image.shape[1]
    num_pixels = width * height
    
    num_white_pixels = 0
    
    summed_matrix = np.sum(image,axis=-1)
   
    # Note: A 3-channel white pixel has RGB (255, 255, 255)
    
    num_white_pixels = np.count_nonzero(summed_matrix > 620)
    ratio_white_pixels = num_white_pixels / num_pixels
    
    green_concentration = np.mean(image[1])
    blue_concentration = np.mean(image[2])
    
    return ratio_white_pixels, green_concentration, blue_concentration



def select_k_best_regions(regions, k=20):
    """
    Args:
        regions               list           list of 2-component tuples first component the region, 
                                             second component the ratio of white pixels
                                             
        k                     int            number of regions to select
    """
    regions = [x for x in regions if x[3] > 180 and x[4] > 180]
   
    k_best_regions = sorted(regions, key=lambda tup: tup[2])[:k]
    return k_best_regions


def generate_patches(image, window_size=200, stride=128, k=20):
    
    image = np.array(image)
    
    max_width, max_height = image.shape[0], image.shape[1]
    regions_container = []
    i = 0
    
    while window_size + stride*i <= max_height:
        j = 0
        
        while window_size + stride*j <= max_width:            
            x_top_left_pixel = j * stride
            y_top_left_pixel = i * stride
            
            patch = image[
                x_top_left_pixel : x_top_left_pixel + window_size,
                y_top_left_pixel : y_top_left_pixel + window_size,
                :
            ]
            
            ratio_white_pixels, green_concentration, blue_concentration = compute_statistics(patch)
            
            region_tuple = (x_top_left_pixel, y_top_left_pixel, ratio_white_pixels, green_concentration, blue_concentration)
            regions_container.append(region_tuple)
            
            j += 1
        
        i += 1
    
    k_best_region_coordinates = select_k_best_regions(regions_container, k=k)
    k_best_regions = get_k_best_regions(k_best_region_coordinates, image, window_size)
    
    return image, k_best_region_coordinates, k_best_regions


def get_k_best_regions(coordinates, image, window_size=512):
    regions = {}
    for i, tup in enumerate(coordinates):
        x, y = tup[0], tup[1]
        regions[i] = image[x : x+window_size, y : y+window_size, :]
    
    return regions



def glue_to_one_picture(image_patches, window_size=200, k=16):
    side = int(np.sqrt(k))
    image = np.zeros((side*window_size, side*window_size, 3), dtype=np.int16)
        
    for i, patch in image_patches.items():
        x = i // side
        y = i % side
        image[
            x * window_size : (x+1) * window_size,
            y * window_size : (y+1) * window_size,
            :
        ] = patch
    
    return image

# test patch extraction 

import random

WINDOW_SIZE = 128
STRIDE = 64
K = 16


def test_patch_extraction(): 
    fig, ax = plt.subplots(6, 2, figsize=(20, 25))

    train_df = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv').sample(n=10, random_state=random.seed())

    images = list(train_df['image_id'])
    labels = list(train_df['isup_grade'])

    data_dir = '../input/prostate-cancer-grade-assessment/train_images/'

    for i, img in enumerate(images[:6]):

        url = data_dir + img + '.tiff'
        image = skimage.io.MultiImage(url)[-1]

        image, best_coordinates, best_regions = generate_patches(image, window_size=WINDOW_SIZE, stride=STRIDE, k=K)
        glued_image = glue_to_one_picture(best_regions, window_size=WINDOW_SIZE, k=K)

        ax[i][0].imshow(image)
        ax[i][0].set_title(f'{img} - Original - Label: {labels[i]}')

        ax[i][1].imshow(glued_image)
        ax[i][1].set_title(f'{img} - Glued - Label: {labels[i]}')

    fig.suptitle('From biopsy to glued patches')
    

def get_patch_image(image, window_size=WINDOW_SIZE, stride=STRIDE, k=K):
    image, best_coordinates, best_regions = generate_patches(image, window_size, stride, k)
    glued_image = glue_to_one_picture(best_regions, window_size, k)
    return glued_image

#test_patch_extraction()

    


In [None]:
train=pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv')

In [None]:
import os,shutil


destination_dir = '/patch_images/'
if not os.path.exists(destination_dir):
    os.mkdir(destination_dir)

data_dir='/kaggle/input/prostate-cancer-grade-assessment/train_images/'
for i in range(train.shape[0]):
    image_id = train['image_id'].iloc[i]
    raw_image_url = data_dir + image_id +'.tiff'
    image = skimage.io.MultiImage(raw_image_url)[-1]
    patch_image = get_patch_image(image)
    patch_image_url = destination_dir + image_id + '.png'
    skimage.io.imsave(patch_image_url,patch_image,check_contrast=False)
    

shutil.make_archive('patch_images','zip',destination_dir)
    
   

In [None]:
labels=[]
data=[]
data_dir='/patch_images/'
for i in range(train.shape[0]):
    data.append(data_dir + train['image_id'].iloc[i]+'.png')
    labels.append(train['isup_grade'].iloc[i])
df=pd.DataFrame(data)
df.columns=['images']
df['isup_grade']=labels

In [None]:
X_train, X_val, y_train, y_val = train_test_split(df['images'],df['isup_grade'], test_size=0.2, random_state=1234)

In [None]:
train=pd.DataFrame(X_train)
train.columns=['images']
train['isup_grade']=y_train

validation=pd.DataFrame(X_val)
validation.columns=['images']
validation['isup_grade']=y_val

train['isup_grade']=train['isup_grade'].astype(str)
validation['isup_grade']=validation['isup_grade'].astype(str)

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,horizontal_flip=True)

val_datagen=train_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train,
    x_col='images',
    y_col='isup_grade',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    validate_filenames=False)

validation_generator = val_datagen.flow_from_dataframe(
    validation,
    x_col='images',
    y_col='isup_grade',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    validate_filenames=False)

In [None]:
def vgg16_model( num_classes=None):

    model = VGG16(weights='/kaggle/input/keras-pretrained-models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, input_shape=(224, 224, 3))
    x=Flatten()(model.output)
    output=Dense(num_classes,activation='softmax')(x)
    model=Model(model.input,output)
    return model

vgg_conv=vgg16_model(6)

In [None]:
vgg_conv.summary()

In [None]:
def kappa_score(y_true, y_pred):
    
    y_true=tf.math.argmax(y_true)
    y_pred=tf.math.argmax(y_pred)
    return tf.compat.v1.py_func(cohen_kappa_score ,(y_true, y_pred),tf.double)

In [None]:
opt = SGD(lr=0.001)
vgg_conv.compile(loss='categorical_crossentropy',optimizer=opt,metrics=['accuracy'])

In [None]:
nb_epochs = 25
batch_size=32
nb_train_steps = train.shape[0]//batch_size
nb_val_steps=validation.shape[0]//batch_size
print("Number of training and validation steps: {} and {}".format(nb_train_steps,nb_val_steps))

In [None]:
vgg_conv.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_steps,
    epochs=nb_epochs,
    validation_data=validation_generator,
    validation_steps=nb_val_steps)

In [None]:
def predict_submission(df, path):
    
    df["image_path"] = [path+image_id+".tiff" for image_id in df["image_id"]]
    df["isup_grade"] = 0
    predictions = []
    for idx, row in df.iterrows():
        print(row.image_path)
        img=skimage.io.imread(str(row.image_path))
        img = cv2.resize(img, (224,224))
        img = img.astype(np.float32)/255.
        img=np.reshape(img,(1,224,224,3))
        prediction=vgg_conv.predict(img)
        predictions.append(np.argmax(prediction))
            
    df["isup_grade"] = predictions
    df = df.drop('image_path', 1)
    return df[["image_id","isup_grade"]]

In [None]:
test_path = "../input/prostate-cancer-grade-assessment/test_images/"
submission_df = pd.read_csv("../input/prostate-cancer-grade-assessment/sample_submission.csv")

if os.path.exists(test_path):
    test_df = pd.read_csv("../input/prostate-cancer-grade-assessment/test.csv")
    submission_df = predict_submission(test_df, test_path)

submission_df.to_csv('submission.csv', index=False)
submission_df.head()