In [1]:
DEBUG = True

In [2]:
import os
import sys
sys.path = [
    '../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master',
] + sys.path

DISCLAIMER: The libraries are imported at different points in the code for better organization and readability, 
following the logical flow of the code and to facilitate the development process. This allows also for
better management of dependencies. It also allows to have clearer separation of concerns and facilitate the understanding of the code's
purpose and functionality.

In [3]:
import skimage.io
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from efficientnet_pytorch import model as enet

import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

import random
import os

In [4]:
data_dir = '/data/prostate-cancer-grade-assessment/'
df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
df_test = pd.read_csv(os.path.join(data_dir, 'test.csv'))
df_sub = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

model_dir = './panda-public-models/'

image_folder = os.path.join(data_dir, 'test_images')
is_test = os.path.exists(image_folder)  # IF test_images is not exists, we will use some train images.
image_folder = image_folder if is_test else os.path.join(data_dir, 'train_images')

image_folder = r'E:\data\prostate-cancer-grade-assessment\train_images'

desiredNumberOfImages = int(10)    #Due to processing and time requirements insert in the parenthesis the number of images which are desired to be analysed
seedValue = 315019

def generate_random_subdataset(df, num_images, seed):     
    random.seed(seed)
    return df.sample(n=num_images, random_state=random.randint(0, 1000))

subDf_train = generate_random_subdataset(df_train, desiredNumberOfImages, seedValue)
df = df_test if is_test else subDf_train

print(df)

tile_size = 256
image_size = 256
n_tiles = 36
batch_size = 8
num_workers = 4

device = torch.device('cuda')
print(image_folder)


                           image_id data_provider
0  005700be7e06878e6605e7a5a39de1b2       radboud
1  005c6e8877caf724c600fdce5d417d40    karolinska
2  0104f76634ff89bfff1ef0804a95c380       radboud
E:\data\prostate-cancer-grade-assessment\train_images


## Model

In [5]:
class enetv2(nn.Module):
    def __init__(self, backbone, out_dim):
        super(enetv2, self).__init__()
        self.enet = enet.EfficientNet.from_name(backbone)  # Initialize the EfficientNet backbone
        self.myfc = nn.Linear(self.enet._fc.in_features, out_dim)  # Add a fully connected layer for final classification
        self.enet._fc = nn.Identity()  # Replace the original fully connected layer with an identity layer

    def extract(self, x):
        return self.enet(x)  # Extract features using the EfficientNet backbone

    def forward(self, x):
        x = self.extract(x)  # Extract features
        x = self.myfc(x)  # Perform final classification
        return x


def load_models(model_files):
    models = []
    for model_f in model_files:
        model_f = os.path.join(model_dir, model_f)                 # Get the file path of the model
        backbone = 'efficientnet-b0'
        model = enetv2(backbone, out_dim=5)                       # Create an instance of the enetv2 model
        model.load_state_dict(torch.load(model_f, map_location=lambda storage, loc: storage), strict=True)  # Load the model weights
        model.eval()  # Set the model to evaluation mode
        model.to(device)                                         # Move the model to the specified device (e.g., GPU)
        models.append(model)                                    # Add the model to the list of models
        print(f'{model_f} loaded!')
    return models


model_files = [
    'cls_effnet_b0_Rand36r36tiles256_big_bce_lr0.3_augx2_30epo_model_fold0.pth'
]

models = load_models(model_files)  # Load the models


model_dir2 = './pandaenetb042x256x256x3/'
def load_models2(model_files):
    models = []
    for model_f in model_files:
        model_f = os.path.join(model_dir2, model_f)  # Get the file path of the model
        backbone = 'efficientnet-b1'
        model = enetv2(backbone, out_dim=5)                              # Create an instance of the enetv2 model
        model.load_state_dict(torch.load(model_f, map_location=lambda storage, loc: storage), strict=True)  # Load the model weights
        model.eval()                           # Set the model to evaluation mode
        model.to(device)                       # Move the model to the specified device (e.g., GPU)
        models.append(model)                   # Add the model to the list of models
        print(f'{model_f} loaded!')
    return models


model_files2 = [
    'enet_b1_8ep_fold0.pth'
]

models2 = load_models2(model_files2)  # Load the models

./panda-public-models/cls_effnet_b0_Rand36r36tiles256_big_bce_lr0.3_augx2_30epo_model_fold0.pth loaded!
./pandaenetb042x256x256x3/enet_b1_8ep_fold0.pth loaded!


# Dataset

In [6]:
def get_tiles(img, mode=0):
    result = []
    
    # Get the dimensions of the image
    h, w, c = img.shape
    
    # Calculate padding sizes based on the tile size and mode
    pad_h = (tile_size - h % tile_size) % tile_size + ((tile_size * mode) // 2)
    pad_w = (tile_size - w % tile_size) % tile_size + ((tile_size * mode) // 2)
    
    # Pad the image with white pixels
    img2 = np.pad(img, [[pad_h // 2, pad_h - pad_h // 2], [pad_w // 2, pad_w - pad_w // 2], [0, 0]], constant_values=255)
    
    # Reshape the image into tiles
    img3 = img2.reshape(img2.shape[0] // tile_size, tile_size, img2.shape[1] // tile_size, tile_size, 3)
    img3 = img3.transpose(0, 2, 1, 3, 4).reshape(-1, tile_size, tile_size, 3)
    
    # Calculate the number of tiles with non-white pixels
    n_tiles_with_info = (img3.reshape(img3.shape[0], -1).sum(1) < tile_size ** 2 * 3 * 255).sum()
    
    # Pad the image if the number of tiles is less than the desired number
    if len(img) < n_tiles:
        img3 = np.pad(img3, [[0, n_tiles - len(img3)], [0, 0], [0, 0], [0, 0]], constant_values=255)
    
    # Sort the tiles based on the sum of pixel values
    idxs = np.argsort(img3.reshape(img3.shape[0], -1).sum(-1))[:n_tiles]
    img3 = img3[idxs]
    
    # Create a list of tile images with their corresponding index
    for i in range(len(img3)):
        result.append({'img': img3[i], 'idx': i})
    
    return result, n_tiles_with_info >= n_tiles


class PANDADataset(Dataset):
    def __init__(self,
                 df,
                 image_size,
                 n_tiles=n_tiles,
                 tile_mode=0,
                 rand=False,
                 sub_imgs=False,
                 transform=None
                ):
        self.df = df.reset_index(drop=True)
        self.image_size = image_size
        self.n_tiles = n_tiles
        self.tile_mode = tile_mode
        self.rand = rand
        self.sub_imgs = sub_imgs
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        img_id = row.image_id
        
        tiff_file = os.path.join(image_folder, f'{img_id}.tiff')
        image = skimage.io.MultiImage(tiff_file)[1]
        
        tiles, OK = get_tiles(image, self.tile_mode)  # Get the tiles using the get_tiles function
        
        # Generate random indexes for selecting tiles
        if self.rand:
            idxes = np.random.choice(list(range(self.n_tiles)), self.n_tiles, replace=False)
        else:
            idxes = list(range(self.n_tiles))
        
        idxes = np.asarray(idxes) + self.n_tiles if self.sub_imgs else idxes               # Adjust the indexes if sub_imgs is True
        
        n_row_tiles = int(np.sqrt(self.n_tiles))
        images = np.zeros((image_size * n_row_tiles, image_size * n_row_tiles, 3))
        
        # Construct the image from selected tiles
        for h in range(n_row_tiles):
            for w in range(n_row_tiles):
                i = h * n_row_tiles + w
    
                if len(tiles) > idxes[i]:
                    this_img = tiles[idxes[i]]['img']
                else:
                    this_img = np.ones((self.image_size, self.image_size, 3)).astype(np.uint8) * 255
                this_img = 255 - this_img
                h1 = h * image_size
                w1 = w * image_size
                images[h1:h1+image_size, w1:w1+image_size] = this_img
        
        if self.transform is not None:                # Apply transformations to the image
            images = self.transform(image=images)['image']
            
        # Convert image to float32 and normalize
        images = images.astype(np.float32)
        images /= 255
        
        # Transpose image to match PyTorch format
        images = images.transpose(2, 0, 1)
        
        return torch.tensor(images)


In [7]:
if not is_test:             # Create a dataset for visualization
    dataset_show = PANDADataset(df, image_size, n_tiles, 0)
    # Set the figure size for plotting
    from pylab import rcParams
    rcParams['figure.figsize'] = 20, 10
    
    # Generate subplots for visualization
    for i in range(2):
        f, axarr = plt.subplots(1, 5)
        
        # Select random images from the dataset
        for p in range(5):
            idx = np.random.randint(0, len(dataset_show))
            img = dataset_show[idx]
            axarr[p].imshow(1. - img.transpose(0, 1).transpose(1, 2).squeeze())
            axarr[p].set_title(str(idx))

# Prediction

In [8]:
import albumentations

# Define training transformations
transforms_train = albumentations.Compose([
    albumentations.Transpose(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
])

# Define validation transformations (empty)
transforms_val = albumentations.Compose([])

# Define additional validation transformations
transforms_val1 = albumentations.Compose([
    albumentations.Transpose(p=1)
])

transforms_val2 = albumentations.Compose([
    albumentations.VerticalFlip(p=1)
])

transforms_val3= albumentations.Compose([
    albumentations.HorizontalFlip(p=1),
])

transforms_val4= albumentations.Compose([
    albumentations.Transpose(p=1),
    albumentations.VerticalFlip(p=1),
    albumentations.HorizontalFlip(p=1),
])

# Model Inference

In [9]:
# Create PANDADataset with mode 0
dataset = PANDADataset(df, image_size, n_tiles, 0, False, False, transforms_val)
# Create DataLoader for dataset with specified batch size, num_workers, and shuffle=False
loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)

# Create another PANDADataset with mode 2
dataset2 = PANDADataset(df, image_size, n_tiles, 2, False, False, transforms_val)
# Create DataLoader for dataset2 with specified batch size, num_workers, and shuffle=False
loader2 = DataLoader(dataset2, batch_size=batch_size, num_workers=num_workers, shuffle=False)

# Initialize empty lists to store logits
LOGITS = []
LOGITS2 = []
LOGITS3 = []
LOGITS4 = []

# Perform inference without gradient computation
with torch.no_grad():
    for data in tqdm(loader):
        data = data.to(device)  
        logits = models[0](data)  
        LOGITS.append(logits) 
        logits = models2[0](data)  
        LOGITS3.append(logits)  

    for data in tqdm(loader2):
        data = data.to(device)  
        logits = models[0](data)  
        LOGITS2.append(logits)  
        logits = models2[0](data)  
        LOGITS4.append(logits) 

# Concatenate and process the logits to obtain predictions
LOGITS = (
    torch.cat(LOGITS).sigmoid().cpu() +
    torch.cat(LOGITS2).sigmoid().cpu() +
    torch.cat(LOGITS3).sigmoid().cpu() +
    torch.cat(LOGITS4).sigmoid().cpu()
) / 4
# Sum along the 1st dimension of the logits and convert to NumPy array
PREDS = LOGITS.sum(1).numpy()


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(loader):


  0%|          | 0/1 [00:00<?, ?it/s]

RuntimeError: DataLoader worker (pid(s) 4060, 1124, 13452, 4800) exited unexpectedly

In [None]:

dataset = PANDADataset(df, image_size, n_tiles, 0, False, False, transforms_val1 )  # mode == 0
loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)

dataset2 = PANDADataset(df, image_size, n_tiles, 2, False, False, transforms_val1 )  # mode == 2
loader2 = DataLoader(dataset2, batch_size=batch_size, num_workers=num_workers, shuffle=False)


LOGITS = []
LOGITS2 = []
LOGITS3 = []
LOGITS4 = []
with torch.no_grad():
    for data in tqdm(loader):
        data = data.to(device)
        logits = models[0](data)
        LOGITS.append(logits)
        logits = models2[0](data)
        LOGITS3.append(logits)
        
    for data in tqdm(loader2):
        data = data.to(device)
        logits = models[0](data)
        LOGITS2.append(logits)
        logits = models2[0](data)
        LOGITS4.append(logits)
        
LOGITS = (torch.cat(LOGITS).sigmoid().cpu()+torch.cat(LOGITS2).sigmoid().cpu()+torch.cat(LOGITS3).sigmoid().cpu()+torch.cat(LOGITS4).sigmoid().cpu()) / 4
PREDS1 = LOGITS.sum(1).numpy()


In [None]:
dataset = PANDADataset(df, image_size, n_tiles, 0, False, False, transforms_val2 )  # mode == 0
loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)

dataset2 = PANDADataset(df, image_size, n_tiles, 2, False, False, transforms_val2 )  # mode == 2
loader2 = DataLoader(dataset2, batch_size=batch_size, num_workers=num_workers, shuffle=False)


LOGITS = []
LOGITS2 = []
LOGITS3 = []
LOGITS4 = []
with torch.no_grad():
    for data in tqdm(loader):
        data = data.to(device)
        logits = models[0](data)
        LOGITS.append(logits)
        logits = models2[0](data)
        LOGITS3.append(logits)
        
    for data in tqdm(loader2):
        data = data.to(device)
        logits = models[0](data)
        LOGITS2.append(logits)
        logits = models2[0](data)
        LOGITS4.append(logits)
        
LOGITS = (torch.cat(LOGITS).sigmoid().cpu()+torch.cat(LOGITS2).sigmoid().cpu()+torch.cat(LOGITS3).sigmoid().cpu()+torch.cat(LOGITS4).sigmoid().cpu()) / 4
PREDS2 = LOGITS.sum(1).numpy()


In [None]:
dataset = PANDADataset(df, image_size, n_tiles, 0, False, False, transforms_val3 )  # mode == 0
loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)

dataset2 = PANDADataset(df, image_size, n_tiles, 2, False, False, transforms_val3 )  # mode == 2
loader2 = DataLoader(dataset2, batch_size=batch_size, num_workers=num_workers, shuffle=False)


LOGITS = []
LOGITS2 = []
LOGITS3 = []
LOGITS4 = []
with torch.no_grad():
    for data in tqdm(loader):
        data = data.to(device)
        logits = models[0](data)
        LOGITS.append(logits)
        logits = models2[0](data)
        LOGITS3.append(logits)
        
    for data in tqdm(loader2):
        data = data.to(device)
        logits = models[0](data)
        LOGITS2.append(logits)
        logits = models2[0](data)
        LOGITS4.append(logits)
        
LOGITS = (torch.cat(LOGITS).sigmoid().cpu()+torch.cat(LOGITS2).sigmoid().cpu()+torch.cat(LOGITS3).sigmoid().cpu()+torch.cat(LOGITS4).sigmoid().cpu()) / 4
PREDS3 = LOGITS.sum(1).numpy()

In [None]:
dataset = PANDADataset(df, image_size, n_tiles, 0, False, False, transforms_val4 )  # mode == 0
loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)

dataset2 = PANDADataset(df, image_size, n_tiles, 2, False, False, transforms_val4 )  # mode == 2
loader2 = DataLoader(dataset2, batch_size=batch_size, num_workers=num_workers, shuffle=False)


LOGITS = []
LOGITS2 = []
LOGITS3 = []
LOGITS4 = []
with torch.no_grad():
    for data in tqdm(loader):
        data = data.to(device)
        logits = models[0](data)
        LOGITS.append(logits)
        logits = models2[0](data)
        LOGITS3.append(logits)
        
    for data in tqdm(loader2):
        data = data.to(device)
        logits = models[0](data)
        LOGITS2.append(logits)
        logits = models2[0](data)
        LOGITS4.append(logits)
        
LOGITS = (torch.cat(LOGITS).sigmoid().cpu()+torch.cat(LOGITS2).sigmoid().cpu()+torch.cat(LOGITS3).sigmoid().cpu()+torch.cat(LOGITS4).sigmoid().cpu()) / 4
PREDS4 = LOGITS.sum(1).numpy()


# Predictions of the Models

In [11]:
# # Install efficientnet package
# !pip install ../input/kaggle-efficientnet-repo/efficientnet-1.0.0-py3-none-any.whl

# Import necessary libraries
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import argparse
import os
import skimage.io
from scipy.ndimage import measurements
import os
import numpy as np
import pandas as pd
import argparse
import tensorflow as tf
from keras.optimizers import Adam
from keras.losses import categorical_crossentropy
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy
from keras import layers as L
import efficientnet.tfkeras as efn
from keras.utils import to_categorical
import gc
import albumentations
gc.enable()

# Set parameters
sz = 256
N = 48

# Function to generate image patches
def tile(img):
    result = []
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    return img

# Function to generate larger image patches
def tile2(img):
    result = []
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz + ((sz * 2) // 2), (sz - shape[1]%sz)%sz + ((sz * 2) // 2)
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    return img

# Define the ConvNet model
class ConvNet(tf.keras.Model):
    def __init__(self, engine, input_shape, weights):
        super(ConvNet, self).__init__()
        self.engine = engine(include_top=False, input_shape=input_shape, weights=weights)
        self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.dense_1 = tf.keras.layers.Dense(1024)
        self.dense_2 = tf.keras.layers.Dense(5, activation='sigmoid')

    @tf.function
    def call(self, inputs, **kwargs):
        x = tf.reshape(inputs, (-1, IMG_SIZE, IMG_SIZE, 3))
        x = self.engine(x)
        shape = x.shape
        x = tf.reshape(x, (-1, N_TILES, shape[1], shape[2], shape[3])) 
        x = tf.transpose(x, perm=[0, 2, 1, 3, 4])
        x = tf.reshape(x, (-1, shape[1], N_TILES * shape[2], shape[3])) 
        x = self.avg_pool2d(x)
        x = self.dropout(x, training=False)
        x = self.dense_1(x)
        x = tf.nn.relu(x)
        return self.dense_2(x)
    
# Here we set configuration parameters
is_ef = True
backbone_name = 'efficientnet-b0'
N_TILES = 48
IMG_SIZE = 256

# Check if the backbone name starts with 'efficientnet' and get the corresponding model function
if backbone_name.startswith('efficientnet'):
    model_fn = getattr(efn, f'EfficientNetB{backbone_name[-1]}')

# Initialize the model
model = ConvNet(engine=model_fn, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights=None)

TRAIN = '/data/prostate-cancer-grade-assessment/train_images/'
MASKS = '/data/prostate-cancer-grade-assessment/train_label_masks/'
BASE_PATH = '/data/prostate-cancer-grade-assessment/'
train = pd.read_csv(BASE_PATH + "train.csv")
sub = pd.read_csv("/data/prostate-cancer-grade-assessment/sample_submission.csv")
test = pd.read_csv("/data/prostate-cancer-grade-assessment/test.csv")
TEST = '/data/prostate-cancer-grade-assessment/test_images/'
PRED_PATH = TEST 
df = sub
t_df = test

# Define transformations for validation
transforms_val0 = albumentations.Compose([])
transforms_val1 = albumentations.Compose([
    albumentations.VerticalFlip(p=1)
])
transforms_val2 = albumentations.Compose([
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])
transforms_val3 = albumentations.Compose([
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5)
])

# Set the number of Test Time Augmentations (TTA) iterations and create dummy data in order to allow for model initialization
n_TTA = 2        
dummy_data = tf.zeros((n_TTA * N_TILES, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32)
_ = model(dummy_data)


In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('/')

####

if os.path.exists(PRED_PATH):
    predictions10 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions10.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions10 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions10.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions12 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions12.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df = subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions12 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions12.append(isup)

        del patches, img
        gc.collect()

In [None]:

class ConvNet(tf.keras.Model):

    def __init__(self, engine, input_shape, weights):
        super(ConvNet, self).__init__()
        
        self.engine = engine(
            include_top=False, input_shape=input_shape, weights=weights)
        
        
        self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.dense_1 = tf.keras.layers.Dense(1024)
        self.dense_2 = tf.keras.layers.Dense(5,activation='sigmoid')

    @tf.function
    def call(self, inputs, **kwargs):
        x = tf.reshape(inputs, (-1, IMG_SIZE, IMG_SIZE, 3))
        x = self.engine(x)
        shape = x.shape
        x = tf.reshape(x, (-1, N_TILES, shape[1], shape[2], shape[3])) 
        x = tf.transpose(x, perm=[0, 2, 1, 3, 4])
        x = tf.reshape(x, (-1, shape[1], N_TILES*shape[2], shape[3])) 
        x = self.avg_pool2d(x)
        x = self.dropout(x, training=False)
        x = self.dense_1(x)
        x = tf.nn.relu(x)
        return self.dense_2(x)
    
is_ef = True
backbone_name = 'efficientnet-b1'
N_TILES = 48
IMG_SIZE = 256


if backbone_name.startswith('efficientnet'):
    model_fn = getattr(efn, f'EfficientNetB{backbone_name[-1]}')
    
model = ConvNet(engine=model_fn, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights=None)




n_TTA = 2        
dummy_data = tf.zeros((n_TTA * N_TILES, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32)
_ = model(dummy_data)        


TRAIN = '../input/prostate-cancer-grade-assessment/train_images/'
MASKS = '../input/prostate-cancer-grade-assessment/train_label_masks/'
BASE_PATH = '../input/prostate-cancer-grade-assessment/'
train = pd.read_csv(BASE_PATH + "train.csv")
train.head()

sub = pd.read_csv("../input/prostate-cancer-grade-assessment/sample_submission.csv")
sub.head()

test = pd.read_csv("../input/prostate-cancer-grade-assessment/test.csv")
test.head()

TEST = '../input/prostate-cancer-grade-assessment/test_images/'


PRED_PATH = TEST 
df = sub
t_df = test

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('../input/pandaenetb042x256x256x3/efficientnet-b1-48-full-epochs60.h5')

####

if os.path.exists(PRED_PATH):
    predictions20 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions20.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions20 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions20.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions22 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions22.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions22 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions22.append(isup)

        del patches, img
        gc.collect()

In [None]:

class ConvNet(tf.keras.Model):

    def __init__(self, engine, input_shape, weights):
        super(ConvNet, self).__init__()
        
        self.engine = engine(
            include_top=False, input_shape=input_shape, weights=weights)
        
        
        self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.dense_1 = tf.keras.layers.Dense(1024)
        self.dense_2 = tf.keras.layers.Dense(5,activation='sigmoid')

    @tf.function
    def call(self, inputs, **kwargs):
        x = tf.reshape(inputs, (-1, IMG_SIZE, IMG_SIZE, 3))
        x = self.engine(x)
        shape = x.shape
        x = tf.reshape(x, (-1, N_TILES, shape[1], shape[2], shape[3])) 
        x = tf.transpose(x, perm=[0, 2, 1, 3, 4])
        x = tf.reshape(x, (-1, shape[1], N_TILES*shape[2], shape[3])) 
        x = self.avg_pool2d(x)
        x = self.dropout(x, training=False)
        x = self.dense_1(x)
        x = tf.nn.relu(x)
        return self.dense_2(x)
    
is_ef = True
backbone_name = 'efficientnet-b2'
N_TILES = 48
IMG_SIZE = 256


if backbone_name.startswith('efficientnet'):
    model_fn = getattr(efn, f'EfficientNetB{backbone_name[-1]}')
    
model = ConvNet(engine=model_fn, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights=None)


n_TTA = 2        
dummy_data = tf.zeros((n_TTA * N_TILES, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32)
_ = model(dummy_data)           



TRAIN = '../input/prostate-cancer-grade-assessment/train_images/'
MASKS = '../input/prostate-cancer-grade-assessment/train_label_masks/'
BASE_PATH = '../input/prostate-cancer-grade-assessment/'
train = pd.read_csv(BASE_PATH + "train.csv")
train.head()

sub = pd.read_csv("../input/prostate-cancer-grade-assessment/sample_submission.csv")
sub.head()

test = pd.read_csv("../input/prostate-cancer-grade-assessment/test.csv")
test.head()

TEST = '../input/prostate-cancer-grade-assessment/test_images/'


PRED_PATH = TEST 
df = sub
t_df = test

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('../input/pandaenetb042x256x256x3/efficientnet-b2-48-full-epochs60.h5')

####

if os.path.exists(PRED_PATH):
    predictions30 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions30.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions30 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions30.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions32 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions32.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions32 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.5*np.sum(pred)
        predictions32.append(isup)

        del patches, img
        gc.collect()
        


In [None]:

from tensorflow.keras.applications import densenet as den

class ConvNet(tf.keras.Model):

    def __init__(self, engine, input_shape, weights):
        super(ConvNet, self).__init__()
        
        self.engine = engine(
            include_top=False, input_shape=input_shape, weights=weights)
        
        
        self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.dense_1 = tf.keras.layers.Dense(1024)
        self.dense_2 = tf.keras.layers.Dense(5,activation='sigmoid')

    @tf.function
    def call(self, inputs, **kwargs):
        x = tf.reshape(inputs, (-1, IMG_SIZE, IMG_SIZE, 3))
        x = self.engine(x)
        shape = x.shape
        x = tf.reshape(x, (-1, N_TILES, shape[1], shape[2], shape[3])) 
        x = tf.transpose(x, perm=[0, 2, 1, 3, 4])
        x = tf.reshape(x, (-1, shape[1], N_TILES*shape[2], shape[3])) 
        x = self.avg_pool2d(x)
        x = self.dropout(x, training=False)
        x = self.dense_1(x)
        x = tf.nn.relu(x)
        return self.dense_2(x)
    


N_TILES = 48
IMG_SIZE = 256



model_fn = getattr(den, 'DenseNet121')
    
model = ConvNet(engine=model_fn, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights=None)


n_TTA = 4        
dummy_data = tf.zeros((n_TTA * N_TILES, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32)
_ = model(dummy_data)      



TRAIN = '/data/prostate-cancer-grade-assessment/train_images/'
MASKS = '/data/prostate-cancer-grade-assessment/train_images/train_label_masks/'
BASE_PATH = '/data/prostate-cancer-grade-assessment/train_images/'
train = pd.read_csv(BASE_PATH + "train.csv")
train.head()

sub = pd.read_csv("/data/prostate-cancer-grade-assessment/sample_submission.csv")
sub.head()

test = pd.read_csv("/data/prostate-cancer-grade-assessment/test.csv")
test.head()

TEST = '/data/prostate-cancer-grade-assessment/test_images/'


PRED_PATH = TEST 
df = sub
t_df = test

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('../input/pandaenetb042x256x256x3/DenseNet121-48-full-epochs60.h5')

####

if os.path.exists(PRED_PATH):
    predictions40 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)
        patches1 = patches.copy()
        patches2 = patches.copy()
        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches1[k, ] = transforms_val0(image=patches1[k, ])['image']
            patches2[k, ] = transforms_val1(image=patches2[k, ])['image']
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches1, patches2, patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.25*np.sum(pred)
        predictions40.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions40 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)
        patches1 = patches.copy()
        patches2 = patches.copy()
        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches1[k, ] = transforms_val0(image=patches1[k, ])['image']
            patches2[k, ] = transforms_val1(image=patches2[k, ])['image']
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches1, patches2, patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.25*np.sum(pred)
        predictions40.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions42 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)
        patches1 = patches.copy()
        patches2 = patches.copy()
        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches1[k, ] = transforms_val0(image=patches1[k, ])['image']
            patches2[k, ] = transforms_val1(image=patches2[k, ])['image']
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches1, patches2, patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.25*np.sum(pred)
        predictions42.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions42 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)
        patches1 = patches.copy()
        patches2 = patches.copy()
        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches1[k, ] = transforms_val0(image=patches1[k, ])['image']
            patches2[k, ] = transforms_val1(image=patches2[k, ])['image']
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches1, patches2, patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image) 
        isup = 0.25*np.sum(pred)
        predictions42.append(isup)

        del patches, img
        gc.collect()


        
del model, dummy_data, sub, pred, train, isup, image
del patches1,patches2,patches3,patches4    

gc.collect()  

In [None]:

sz = 256
N = 42
def tile(img):
    result = []
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    return img

def tile2(img):
    result = []
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz + ((sz * 2) // 2), (sz - shape[1]%sz)%sz + ((sz * 2) // 2)
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    return img




class ConvNet(tf.keras.Model):

    def __init__(self, engine, input_shape, weights):
        super(ConvNet, self).__init__()
        
        self.engine = engine(
            include_top=False, input_shape=input_shape, weights=weights)
        
        
        self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.dense_1 = tf.keras.layers.Dense(1024)
        self.dense_2 = tf.keras.layers.Dense(1)

    @tf.function
    def call(self, inputs, **kwargs):
        x = tf.reshape(inputs, (-1, IMG_SIZE, IMG_SIZE, 3))
        x = self.engine(x)
        shape = x.shape
        x = tf.reshape(x, (-1, N_TILES, shape[1], shape[2], shape[3])) 
        x = tf.transpose(x, perm=[0, 2, 1, 3, 4])
        x = tf.reshape(x, (-1, shape[1], N_TILES*shape[2], shape[3])) 
        x = self.avg_pool2d(x)
        x = self.dropout(x, training=False)
        x = self.dense_1(x)
        x = tf.nn.relu(x)
        return self.dense_2(x)
    
is_ef = True
backbone_name = 'efficientnet-b0'

N_TILES = 42
IMG_SIZE = 256

if backbone_name.startswith('efficientnet'):
    model_fn = getattr(efn, f'EfficientNetB{backbone_name[-1]}')

    
model = ConvNet(engine=model_fn, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights=None)



TRAIN = '../input/prostate-cancer-grade-assessment/train_images/'
MASKS = '../input/prostate-cancer-grade-assessment/train_label_masks/'
BASE_PATH = '../input/prostate-cancer-grade-assessment/'
train = pd.read_csv(BASE_PATH + "train.csv")
train.head()

sub = pd.read_csv("../input/prostate-cancer-grade-assessment/sample_submission.csv")
sub.head()

test = pd.read_csv("../input/prostate-cancer-grade-assessment/test.csv")
test.head()

TEST = '../input/prostate-cancer-grade-assessment/test_images/'


PRED_PATH = TEST 
df = sub
t_df = test




####





n_TTA = 2        
dummy_data = tf.zeros((n_TTA * N_TILES, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32)
_ = model(dummy_data)   



In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('../input/pandaenetb042x256x256x3/efficientnet-b0-fold0-epochs40.h5')

####

if os.path.exists(PRED_PATH):
    predictions50 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions50.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions50 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions50.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions52 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions52.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions52 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions52.append(isup)

        del patches, img
        gc.collect()
        


In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('../input/pandaenetb042x256x256x3/efficientnet-b0-fold4-epochs60.h5')

####

if os.path.exists(PRED_PATH):
    predictions60 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions60.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions60 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions60.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions62 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions62.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions62 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions62.append(isup)

        del patches, img
        gc.collect()
        







In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr= 1e-05), loss= tf.nn.sigmoid_cross_entropy_with_logits)
model.load_weights('../input/pandaenetb042x256x256x3/efficientnet-b0-fold2-epochs40.h5')

####

if os.path.exists(PRED_PATH):
    predictions70 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions70.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions70 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions70.append(isup)

        del patches, img
        gc.collect()


####

if os.path.exists(PRED_PATH):
    predictions72 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions72.append(isup)

        del patches, img
        gc.collect()

else:
    PRED_PATH = TRAIN
    df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    df=subDf_train
    df = df[['image_id','isup_grade']].copy()
    predictions72 = []
    for index, row in tqdm(df.iterrows(), total = df.shape[0]):
        
        
        image_id = row['image_id']
        
        img_path = PRED_PATH + image_id + '.tiff' #BASE_PATH
        
        img = skimage.io.MultiImage(img_path)[1]
        
        patches = tile2(img)

        patches3 = patches.copy()
        patches4 = patches.copy() 
        
        k = 0
        while k < N_TILES:
            patches3[k, ] = transforms_val2(image=patches3[k, ])['image']
            patches4[k, ] = transforms_val3(image=patches4[k, ])['image']
            k += 1
        
        image = np.stack([patches3, patches4])
        image = image / 255.0
        
        pred = model.predict(image)
        isup = np.mean(pred)
        predictions72.append(isup)

        del patches, img
        gc.collect()
        



del model, dummy_data, sub, pred, train, isup, image
del patches3,patches4    

gc.collect()  

In [None]:
# Calculate the weighted average of predictions
PREDS = (1/5)*PREDS + (1/5)*PREDS1 + (1/5)*PREDS2 + (1/5)*PREDS3 + (1/5)*PREDS4

# Calculate the final predictions using a weighted average. The weights were determined looking at other 
# solutions present in the literature
FINAL = np.round((6/10)*PREDS +
                  (2/60)*np.array(predictions10) + (2/60)*np.array(predictions12) + 
                  (2/60)*np.array(predictions20) + (2/60)*np.array(predictions22) +
                  (2/60)*np.array(predictions30) + (2/60)*np.array(predictions32) +
                  (0.5/10)*np.array(predictions40) + (0.5/10)*np.array(predictions42) +
                  (1/60)*np.array(predictions50) + (1/60)*np.array(predictions52) +
                  (1/60)*np.array(predictions60) + (1/60)*np.array(predictions62) +
                  (1/60)*np.array(predictions70) + (1/60)*np.array(predictions72))

# Convert the final predictions to integer values
df['isup_grade'] = FINAL.astype(int)

# Save the predictions to a CSV file
df[['image_id', 'isup_grade']].to_csv('submission.csv', index=False)

# Calculate and print some statistics
true_values = np.array(subDf_train['isup_grade'])
print(df.head())
print()
print(df.isup_grade.value_counts())

# Results and Model Evaluation

In [None]:
# Calculate the average predictions for each model
average_predictions1 = np.round(np.mean([predictions10, predictions12], axis=0))
average_predictions2 = np.round(np.mean([predictions20, predictions22], axis=0))
average_predictions3 = np.round(np.mean([predictions30, predictions32], axis=0))
average_predictions4 = np.round(np.mean([predictions40, predictions42], axis=0))
average_predictions5 = np.round(np.mean([predictions50, predictions52], axis=0))
average_predictions6 = np.round(np.mean([predictions60, predictions62], axis=0))
average_predictions7 = np.round(np.mean([predictions70, predictions72], axis=0))

# Plot the line plot for each set of average predictions
plt.plot(average_predictions1, label='efficientnet-b0-48-full-epochs60.h5', marker='o')
plt.plot(average_predictions2, label='efficientnet-b1-48-full-epochs60.h5', marker='o')
plt.plot(average_predictions3, label='efficientnet-b2-48-full-epochs60.h5', marker='o')
plt.plot(average_predictions4, label='DenseNet121-48-full-epochs60.h5', marker='o')
plt.plot(average_predictions5, label='efficientnet-b0-fold0-epochs40.h5', marker='o')
plt.plot(average_predictions6, label='efficientnet-b0-fold4-epochs60.h5', marker='o')
plt.plot(average_predictions7, label='efficientnet-b0-fold2-epochs40.h5', marker='o')

# Set the x-axis label
plt.xlabel('Image ID')
# Set the y-axis label
plt.ylabel('Average Predictions')
# Set the title of the plot
plt.title('Average Predictions for Each Image')
# Add a legend to the plot
plt.legend()

# Rotate the x-axis tick labels vertically
plt.xticks(rotation='vertical')

# Display the plot
plt.show()


In [None]:
# Plot the line plot for each set of predictions and true values
plt.plot(np.round(PREDS), label='PREDS', marker='o')
plt.plot(np.round(PREDS1), label='PREDS1', marker='o')
plt.plot(np.round(PREDS2), label='PREDS2', marker='o')
plt.plot(np.round(PREDS3), label='PREDS3', marker='o')
plt.plot(np.round(PREDS4), label='PREDS4', marker='o')
plt.plot(np.round(predictions10), label='efficientnet-b0-48-full-epochs60.h5(1)', marker='o')
plt.plot(np.round(predictions12), label='efficientnet-b1-48-full-epochs60.h5(2)', marker='o')
plt.plot(np.round(predictions20), label='efficientnet-b2-48-full-epochs60.h5(1)', marker='o')
plt.plot(np.round(predictions22), label='efficientnet-b2-48-full-epochs60.h5(2)', marker='o')
plt.plot(np.round(predictions30), label='efficientnet-b2-48-full-epochs60.h5(1)', marker='o')
plt.plot(np.round(predictions32), label='efficientnet-b2-48-full-epochs60.h5(2)', marker='o')
plt.plot(np.round(predictions40), label='DenseNet121-48-full-epochs60.h5(1)', marker='o')
plt.plot(np.round(predictions42), label='DenseNet121-48-full-epochs60.h5(2)', marker='o')
plt.plot(np.round(predictions50), label='efficientnet-b0-fold0-epochs40.h5(1)', marker='o')
plt.plot(np.round(predictions52), label='efficientnet-b0-fold2-epochs40.h5(2)', marker='o')
plt.plot(np.round(predictions60), label='efficientnet-b0-fold4-epochs60.h5(1)', marker='o')
plt.plot(np.round(predictions62), label='efficientnet-b0-fold4-epochs60.h5(2)', marker='o')
plt.plot(np.round(predictions70), label='efficientnet-b0-fold2-epochs40.h5(1)', marker='o')
plt.plot(np.round(predictions72), label='efficientnet-b0-fold2-epochs40.h5(2)', marker='o')
plt.plot(true_values, marker='o', label='True Value', linewidth=3, linestyle='--')

# Set the x-axis label
plt.xlabel('Image')
# Set the y-axis label
plt.ylabel('Average Predictions')
# Set the title of the plot
plt.title('Average Predictions for Each Image')
# Add a legend to the plot
plt.legend()
plt.xticks(rotation='vertical')

plt.show()


In [None]:
# Define the x-axis values (image IDs)
image_ids = subDf_train['image_id']

# Define the predictions for each model
predictions = [
    np.round(PREDS),
    np.round(PREDS1),
    np.round(PREDS2),
    np.round(PREDS3),
    np.round(PREDS4),
    np.round(predictions10),
    np.round(predictions12),
    np.round(predictions20),
    np.round(predictions22),
    np.round(predictions30),
    np.round(predictions32),
    np.round(predictions40),
    np.round(predictions42),
    np.round(predictions50),
    np.round(predictions52),
    np.round(predictions60),
    np.round(predictions62),
    np.round(predictions70),
    np.round(predictions72)
]
prediction_names = [
    'PREDS', 'PREDS1', 'PREDS2', 'PREDS3', 'PREDS4', 'predictions10', 'predictions12', 'predictions20', 'predictions22',
    'predictions30', 'predictions32', 'predictions40', 'predictions42', 'predictions50', 'predictions52',
    'predictions60', 'predictions62', 'predictions70', 'predictions72'
]

# Define the number of rows and columns for subplots
num_rows = 4
num_cols = 5

# Create subplots for comparing each model's predictions with the true values
fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(20, 16))

# Iterate through each model's predictions
for i, ax in enumerate(axes.flatten()):
    if i < len(predictions):
        label=prediction_names[i]
        # Plot the true values
        ax.plot(true_values, marker='o', label='True Value',linewidth=3, linestyle='--')
        # Plot the predictions
        ax.plot(predictions[i], label=label, marker='o')
        

        
        # Set labels and title
        ax.set_xlabel('Image ID')
        ax.set_ylabel('Predictions')
        ax.set_title(label)
        
        # Rotate the x-axis tick labels vertically
        ax.tick_params(axis='x', rotation=90)
        
        # Add legend
        ax.legend()
    else:
        # Remove empty subplots
        ax.axis('off')

# Adjust the spacing between subplots
plt.tight_layout()

# Display the plot
plt.show() 

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Define the number of rows and columns for subplots
num_rows = 4
num_cols = 5

# Create subplots for comparing each model's predictions with the true values
fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(20, 16))

# Iterate through each model's predictions
for i, ax in enumerate(axes.flatten()):
    if i < len(predictions):
        label = prediction_names[i]
        
        # Get the corresponding true values and predictions
        true_vals = true_values[:len(predictions[i])]
        preds = predictions[i]
        
        # Compute the confusion matrix
        cm = confusion_matrix(true_vals, np.round(preds))
        
        # Create a heatmap for the confusion matrix
        sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', ax=ax)
        
        # Set labels and title
        ax.set_xlabel('Predicted')
        ax.set_ylabel('True')
        ax.set_title(label)
        
        # Rotate the x-axis tick labels vertically
        ax.tick_params(axis='x', rotation=90)
    else:
        # Remove empty subplots
        ax.axis('off')

# Adjust the spacing between subplots
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Define the number of rows and columns for subplots
num_rows = 4
num_cols = 5

# Create subplots for comparing each model's predictions with the true values
fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(20, 16))

# Now we iterate through each model's predictions
for i, ax in enumerate(axes.flatten()):
    if i < len(predictions):
        label = prediction_names[i]
        
        true_vals = true_values[:len(predictions[i])]
        preds = predictions[i]
        
        # Compute the confusion matrix
        cm = confusion_matrix(true_vals, np.round(preds))
        
        # Normalize the confusion matrix
        cm_norm = cm / cm.sum(axis=1, keepdims=True)
        
        # Create a heatmap for the normalized confusion matrix
        sns.heatmap(cm_norm, annot=True, cmap='Blues', fmt='.2f', ax=ax)
        
        # Set labels and title
        ax.set_xlabel('Predicted')
        ax.set_ylabel('True')
        ax.set_title(label)
        
        # Rotate the x-axis tick labels vertically
        ax.tick_params(axis='x', rotation=90)
    else:
        # Remove empty subplots
        ax.axis('off')
        
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [None]:
# Create an array of true labels from subDf_train dataframe
true_labels = np.array(subDf_train['isup_grade'])

# Print the true_labels array
print(true_labels)

# Import the cohen_kappa_score function from sklearn.metrics
from sklearn.metrics import cohen_kappa_score

In [None]:
cm = confusion_matrix(FINAL, true_labels)

# Plot confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Normalized Confusion Matrix Heatmap
plt.figure(figsize=(8, 6))
normalized_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

sns.heatmap(normalized_cm, annot=True, fmt='.2f', cmap='Blues')
plt.title('Normalized Confusion Matrix - Multiclass Classification')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
def calculate_prediction_percentages(predictions, actual_values): #Calculate the percentage of overdiagnosis vs underdiagnosis
    total_predictions = len(predictions)
    greater_count = 0
    lower_count = 0

    for prediction, actual_value in zip(predictions, actual_values):
        if prediction > actual_value:
            greater_count += 1
        elif prediction < actual_value:
            lower_count += 1

    greater_percentage = (greater_count / total_predictions) * 100
    lower_percentage = (lower_count / total_predictions) * 100

    return greater_percentage, lower_percentage, greater_count, lower_count

# Calculate the percentages and counts using the FINAL predictions and true labels
greater_percentage, lower_percentage, greater_count, lower_count = calculate_prediction_percentages(FINAL, true_labels)

print(f"Percentage of predictions greater than actual values: {greater_percentage}%; Absolute count: {greater_count}")
print(f"Percentage of predictions lower than actual values: {lower_percentage}%; Absolute count: {lower_count}")

In [None]:
def quadratic_weighted_kappa(y_hat, y):
    return cohen_kappa_score(y_hat, y, weights='quadratic')

count = 0
for index, val in enumerate(true_labels):
    if FINAL[index] == val:
        count += 1

print(f"Accuracy Train is {(count / desiredNumberOfImages)* 100}")
print(f'Kappa Train is {quadratic_weighted_kappa(FINAL, true_labels)}')

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix

# Compute the confusion matrix
cm = confusion_matrix(true_labels, FINAL)

# Initialize dictionaries to store TP, FP, TN, FN, and other metrics for each class
TP_dict = {}
FP_dict = {}
TN_dict = {}
FN_dict = {}
accuracy_dict = {}
precision_dict = {}
recall_dict = {}
specificity_dict = {}
f1_score_dict = {}

# Calculate TP, FP, TN, FN, and other metrics for each class
for class_id in range(6):
    TP = cm[class_id, class_id]
    FP = np.sum(cm[:, class_id]) - TP
    TN = np.sum(cm) - (TP + FP + np.sum(cm[class_id, :]))
    FN = np.sum(cm[class_id, :]) - TP

    TP_dict[class_id] = TP
    FP_dict[class_id] = FP
    TN_dict[class_id] = TN
    FN_dict[class_id] = FN

    accuracy = (TP + TN) / (TP + FP + TN + FN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    specificity = TN / (TN + FP)
    f1_score = (2 * precision * recall) / (precision + recall)

    accuracy_dict[class_id] = accuracy
    precision_dict[class_id] = precision
    recall_dict[class_id] = recall
    specificity_dict[class_id] = specificity
    f1_score_dict[class_id] = f1_score

# Print TP, FP, TN, FN, and other metrics for each class
for class_id in range(6):
    print(f"Class {class_id}:")
    print("True Positive (TP):", round(TP_dict[class_id], 3))
    print("False Positive (FP):", round(FP_dict[class_id], 3))
    print("True Negative (TN):", round(TN_dict[class_id], 3))
    print("False Negative (FN):", round(FN_dict[class_id], 3))
    print("Accuracy:", round(accuracy_dict[class_id], 3))
    print("Precision:", round(precision_dict[class_id], 3))
    print("Recall:", round(recall_dict[class_id], 3))
    print("Specificity:", round(specificity_dict[class_id], 3))
    print("F1 Score:", round(f1_score_dict[class_id], 3))
    print()



In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

# Define the classes and metrics
classes = [0, 1, 2, 3, 4, 5]
metrics = ['Accuracy', 'Precision', 'Recall', 'Specificity', 'F1 Score']
class_labels = ['ISUP ' + str(class_id) for class_id in classes]  # Update the class labels


# Create a figure and axis for the star plot
fig, ax = plt.subplots(figsize=(8, 6), subplot_kw=dict(polar=True))

# Plot the star plot for each class with the custom color
colors = ["red", "green", "blue", "grey", "orange", "yellow"]
for class_id, class_label, color in zip(classes, class_labels, colors):
    values = [
        accuracy_dict[class_id],
        precision_dict[class_id],
        recall_dict[class_id],
        specificity_dict[class_id],
        f1_score_dict[class_id]
    ]
    angles = np.linspace(0, 2 * np.pi, len(metrics), endpoint=False).tolist()
    values += values[:1]  # Repeat the first value to close the plot
    angles += angles[:1]  # Repeat the first angle to close the plot
    ax.plot(angles, values, label=class_label, color=color)  # Use the specific color
    ax.fill(angles, values, alpha=0.05)

# Set the labels and title
ax.set_xticks(angles[:-1])
ax.set_xticklabels(metrics)
ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
ax.set_yticklabels(['0.2', '0.4', '0.6', '0.8', '1.0'])
ax.set_title('Star Plot of Metrics for Different ISUP Grades')

# Add a legend
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

plt.show()

In [None]:
overall_accuracy = np.mean(list(accuracy_dict.values()))
overall_precision = np.mean(list(precision_dict.values()))
overall_recall = np.mean(list(recall_dict.values()))
overall_specificity = np.mean(list(specificity_dict.values()))
overall_f1_score = np.mean(list(f1_score_dict.values()))
print("Overall Accuracy:", overall_accuracy)
print("Overall Precision:", overall_precision)
print("Overall Recall:", overall_recall)
print("Overall Specificity:", overall_specificity)
print("Overall F1 Score:", overall_f1_score)

In [None]:
#Calculating our expected number of unique draws
total_numbers = 10616
draw_size = 200
num_draws = 12

expected_total_unique_objects = total_numbers * (1 - (1 - draw_size/total_numbers)**num_draws)

print("Expected total number of unique objects:", expected_total_unique_objects)

# Explainability

In [None]:
import matplotlib.pyplot as plt

image_ids = np.array(subDf_train['image_id'].values)
true_values = subDf_train['isup_grade'].values
predictions = FINAL # Replace with your list of predictions

# Set the x-axis positions
x = range(len(image_ids))

# Create the line plots
plt.figure(figsize=(8, 6))
plt.plot(x, true_values, marker='o', label='True Value')
plt.plot(x, predictions, marker='o', label='Model Prediction')

# Set the x-axis ticks and labels
plt.xlabel('Image ID')
plt.ylabel('ISUP Grade')

# Set the title and legend
plt.title('True Value vs. Prediction')
plt.legend()

# Display the plot
plt.show()

In [None]:
import pandas as pd

# Create a new column 'predicted ISUP' in subDf_train
subDf_train['predicted_ISUP'] = [int(x) for x in FINAL] #it is to decide if we want to round up or round down the ISUP value:underdiagnose or over diagnose

## Visualization of histological sample and model prediction

In [None]:
import matplotlib.pyplot as plt
import skimage.io

def display_image_with_prediction(image_id, data_provider, gleason_score, true_value, prediction):
    image_path = '../input/prostate-cancer-grade-assessment/train_images/' + image_id + '.tiff'
    image = skimage.io.MultiImage(image_path)[1]
    
    plt.figure(figsize=(8, 6))
    plt.imshow(image)
    plt.title(f"Image ID: {image_id} | Data Provider: {data_provider} | Gleason Score: {gleason_score} | True Value: {true_value} | Prediction: {prediction}")
    plt.axis('off')
    plt.show()

# Get the first 5 image IDs, Gleason scores, true values, and predictions
image_ids = subDf_train['image_id'].values[:5]
gleason_scores = subDf_train['gleason_score'].values[:5]
data_providers = subDf_train['data_provider'].values[:5]
true_values = subDf_train['isup_grade'].values[:5]
predictions = subDf_train['predicted_ISUP'].values[:5]

# Display images and predictions
for image_id, gleason_score, data_provider, true_value, prediction in zip(image_ids, gleason_scores, data_providers, true_values, predictions):
    display_image_with_prediction(image_id, data_provider, gleason_score, true_value, prediction)

## Visualization with mask overlay

In [None]:
def overlay_mask_on_slide(slide, mask, center='radboud', alpha=0.8, max_size=(800, 800), title=''):
    """Show a mask overlayed on a slide."""
    
    if center not in ['radboud', 'karolinska']:
        raise Exception("Unsupported palette, should be one of [radboud, karolinska].")
    
    # Load data from the highest level
    slide_data = slide.read_region((0, 0), slide.level_count - 1, slide.level_dimensions[-1])
    mask_data = mask.read_region((0, 0), mask.level_count - 1, mask.level_dimensions[-1])
    
    # Mask data is present in the R channel
    mask_data = mask_data.split()[0]
    
    # Create alpha mask
    alpha_int = int(round(255 * alpha))
    if center == 'radboud':
        alpha_content = np.less(mask_data.split()[0], 2).astype('uint8') * alpha_int + (255 - alpha_int)
    elif center == 'karolinska':
        alpha_content = np.less(mask_data.split()[0], 1).astype('uint8') * alpha_int + (255 - alpha_int)
    
    alpha_content = Image.fromarray(alpha_content)
    preview_palette = np.zeros(shape=768, dtype=int)
    
    if center == 'radboud':
        # Mapping: {0: background, 1: stroma, 2: benign epithelium, 3: Gleason 3, 4: Gleason 4, 5: Gleason 5}
        preview_palette[0:18] = (np.array([0, 0, 0, 0.5, 0.5, 0.5, 0, 1, 0, 1, 1, 0.7, 1, 0.5, 0, 1, 0, 0]) * 255).astype(int)
    elif center == 'karolinska':
        # Mapping: {0: background, 1: benign, 2: cancer}
        preview_palette[0:9] = (np.array([0, 0, 0, 0, 1, 0, 1, 0, 0]) * 255).astype(int)
    
    mask_data.putpalette(data=preview_palette.tolist())
    mask_rgb = mask_data.convert(mode='RGB')
    
    overlayed_image = Image.composite(image1=slide_data, image2=mask_rgb, mask=alpha_content)
    overlayed_image.thumbnail(size=max_size, resample=0)
    
    title= f"Image ID: {image_id} | Data Provider: {data_provider} | Gleason Score: {gleason_score} | True Value: {true_value} | Prediction: {prediction}"
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 10))
    fig.suptitle(title, fontsize=14, fontweight='bold')
    
    axes[0].imshow(slide_data)
    axes[0].set_title('Slide Image')
    axes[0].axis('off')
    
    axes[1].imshow(overlayed_image)
    axes[1].set_title('Overlayed Image')
    axes[1].axis('off')
    
    plt.show()

In [None]:
import openslide
from PIL import Image
import random

data_dir = '/kaggle/input/prostate-cancer-grade-assessment/train_images'
mask_dir = '/kaggle/input/prostate-cancer-grade-assessment/train_label_masks'

# Select 5 random samples
random_samples = subDf_train.sample(5)

# Iterate over each random sample
for index, row in random_samples.iterrows():
    image_id = row['image_id']
    data_provider = row['data_provider']
    gleason_score = row['gleason_score']
    true_value = row['isup_grade']
    prediction = row['predicted_ISUP']
    
    # Set the file paths for the slide and mask
    slide_path = os.path.join(data_dir, f'{image_id}.tiff')
    mask_path = os.path.join(mask_dir, f'{image_id}_mask.tiff')
    
    # Overlay the mask on the slide for radboud data provider
    if data_provider == 'radboud':
        slide = openslide.OpenSlide(slide_path)
        mask = openslide.OpenSlide(mask_path)
        overlay_mask_on_slide(slide, mask, center='radboud', title=f"Image ID: {image_id} | Data Provider: {data_provider} | Gleason Score: {gleason_score} | True Value: {true_value} | Prediction: {prediction}")
        
    # Overlay the mask on the slide for karolinska data provider
    elif data_provider == 'karolinska':
        slide = openslide.OpenSlide(slide_path)
        mask = openslide.OpenSlide(mask_path)
        overlay_mask_on_slide(slide, mask, center='karolinska', alpha=0.5, title=f"Image ID: {image_id} | Data Provider: {data_provider} | Gleason Score: {gleason_score} | True Value: {true_value} | Prediction: {prediction}")
