In [None]:
import os
from skimage import io, transform
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
import torch
import time
import openslide


n_tiles = 16
tile_size = 256
image_size = 256

def get_tiles(img, mode=0):
    
    result = []
    h, w, c = img.shape
    
    # number of "pixels" we need to pad both ways
    pad_h = (tile_size - h % tile_size) % tile_size + ((tile_size * mode) // 2)
    pad_w = (tile_size - w % tile_size) % tile_size + ((tile_size * mode) // 2)

    # padded images, which can be divided each dimension by tile_size
    img2 = np.pad(img, [[pad_h // 2, pad_h - pad_h // 2], [pad_w // 2, pad_w - pad_w // 2], [0, 0]], constant_values=255)
    #print(f'img2.shape: {img2.shape}')

    # getting the number of tiles in the padded image (getting the shape)
    img3 = img2.reshape(img2.shape[0] // tile_size, tile_size, img2.shape[1] // tile_size, tile_size, 3)
    #print(f'img3.shape: {img3.shape}')

    # reshaping image
    img3 = img3.transpose(0, 2, 1, 3, 4).reshape(-1, tile_size, tile_size, 3)
    #print(f'img3.shape: {img3.shape}')

    # if number of tiles we prepared is lower than n_tiles defined, pad more
    #print(f'len(img3):{len(img3)} n_tiles: {n_tiles}')
    if len(img3) < n_tiles:
        img3 = np.pad(img3, [[0, n_tiles - len(img3)], [0, 0], [0, 0], [0, 0]], constant_values=255)

    # getting the indexes of the tiles
    idxs = np.argsort(img3.reshape(img3.shape[0],-1).sum(-1))[:n_tiles]


    img3 = img3[idxs]

    for i in range(len(img3)):
        result.append({'img': img3[i], 'idx': i})
    return result

def tiles_to_img(tiles, mode):
    n_row_tiles = int(np.sqrt(n_tiles))
    idxes = list(range(n_tiles))
    images = np.zeros((image_size * n_row_tiles, image_size * n_row_tiles, 3))
    for h in range(n_row_tiles):
        for w in range(n_row_tiles):
            i = h * n_row_tiles + w

            if len(tiles) > idxes[i]:
                this_img = tiles[idxes[i]]['img']
            else:
                this_img = np.ones((image_size, image_size, 3)).astype(np.uint8) * 255
            if(mode):
                this_img = 255 - this_img
            h1 = h * image_size
            w1 = w * image_size
            images[h1:h1 + image_size, w1:w1 + image_size] = this_img
    return images

class Dataset(Dataset):
    def __init__(self, images_dir, masks_dir, device, transform=None):
        self.image_dir = images_dir
        self.mask_dir = masks_dir
        self.transform = transform
        ### inserting testing code ###
        import time
        df = pd.read_csv("/kaggle/input/prostate-cancer-grade-assessment/train.csv")
        df = df.loc[df['data_provider'] == 'radboud']
        image_list = []
        mask_list = []
        for i in df.index:
            if(os.path.isfile('/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/'+df.at[i, "image_id"] + '_mask.tiff')):
                image_list.append(df.at[i, "image_id"] + '.tiff')
                mask_list.append(df.at[i, "image_id"] + '_mask.tiff')
        image_list = ["/kaggle/input/prostate-cancer-grade-assessment/train_images/000920ad0b612851f8e01bcc880d9b3d.tiff"]
        mask_list = ["/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/000920ad0b612851f8e01bcc880d9b3d_mask.tiff"]
        self.images = image_list
        self.masks = mask_list
        ### end of testing code, below correct version
        #self.images = os.listdir(images_dir)
        #self.masks = os.listdir(masks_dir)
        self.device = device

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.masks[index])

        image = openslide.OpenSlide(img_path)
        image = image.read_region((0,0), 2, image.level_dimensions[2])
        image = np.asarray(image)[:,:,0:3]
        mask = openslide.OpenSlide(mask_path)
        mask = mask.read_region((0,0), 2, mask.level_dimensions[2])
        mask = np.asarray(mask)[:,:,0:3]

        image_tiles = get_tiles(image)
        mask_tiles = get_tiles(mask)


        images = tiles_to_img(image_tiles, 1)
        masks = tiles_to_img(mask_tiles, 0)
        masks = masks[:,:,0]

        images = images.astype(np.float32)
        images /= 255
        images = images.transpose(2, 0, 1)

        masks = masks.astype(np.float32)
        masks = np.where(masks==255, 6, masks) 
        #masks /= 255
        #masks = masks.transpose(2, 0, 1)

        images = torch.tensor(images).to(self.device)
        masks = torch.tensor(masks).to(torch.long).to(self.device)
        #print("Masks: ", masks)
        return images, masks


In [None]:
import sys
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
import torch.nn.functional as F
#from torchsummary import summary
import time

def load_image(infilename):
    img = Image.open(infilename, 'r')
    img.load()
    data = np.asarray(img, dtype="float32")
    return data


def double_conv(in_c, out_c):
    conv = nn.Sequential(
        nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
        nn.ReLU(inplace=True)
    )
    return conv

def crop_img(tensor, target_tensor):
    # square images
    target_size = target_tensor.size()[2]
    if tensor.size()[2] % 2 == 1:
        tensor_size = tensor.size()[2]-1
    else:
        tensor_size = tensor.size()[2]
    delta = tensor_size - target_size
    delta = delta // 2
    return tensor[:, :, delta:tensor_size-delta, delta:tensor_size-delta]

class UNet(nn.Module):
    def __init__(self, nb_classes):
        super(UNet, self).__init__()
        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.down_conv_1 = double_conv(3, 64)
        self.down_conv_2 = double_conv(64, 128)
        self.down_conv_3 = double_conv(128, 256)
        self.down_conv_4 = double_conv(256, 512)
        self.down_conv_5 = double_conv(512, 1024)

        ## transposed convolutions
        self.up_trans_1 = nn.ConvTranspose2d(1024, 512, 2, 2)
        self.up_conv_1 = double_conv(1024, 512)

        self.up_trans_2 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.up_conv_2 = double_conv(512, 256)

        self.up_trans_3 = nn.ConvTranspose2d(256, 128, 2, 2)
        self.up_conv_3 = double_conv(256, 128)

        self.up_trans_4 = nn.ConvTranspose2d(128, 64, 2, 2)
        self.up_conv_4 = double_conv(128, 64)

        self.out = nn.Conv2d(64, nb_classes, 1)


    def forward(self, image):
        # encoder part
        # input image
        x1 = self.down_conv_1(image) # this is passed to decoder
        # max pooling
        x2 = self.max_pool_2x2(x1)

        x3 = self.down_conv_2(x2) # this is passed to decoder
        x4 = self.max_pool_2x2(x3)

        x5 = self.down_conv_3(x4) # this is passed to decoder
        x6 = self.max_pool_2x2(x5)

        x7 = self.down_conv_4(x6) # this is passed to decoder
        x8 = self.max_pool_2x2(x7)

        x9 = self.down_conv_5(x8)

        # decoder part
        x = self.up_trans_1(x9)
        y = crop_img(x7, x)
        x = self.up_conv_1(torch.cat([x, y], 1))

        x = self.up_trans_2(x)
        y = crop_img(x5, x)
        x = self.up_conv_2(torch.cat([x, y], 1))

        x = self.up_trans_3(x)
        y = crop_img(x3, x)
        x = self.up_conv_3(torch.cat([x, y], 1))

        x = self.up_trans_4(x)
        y = crop_img(x1, x)
        x = self.up_conv_4(torch.cat([x, y], 1))

        x = self.out(x)
        return x

In [None]:
batch_size = 1
num_epochs = 100
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
print(f'DEVICE: {device}')


dataset = Dataset('/kaggle/input/prostate-cancer-grade-assessment/train_images','/kaggle/input/prostate-cancer-grade-assessment/train_label_masks', device)
dataset_size = len(dataset)
train_loader = DataLoader(dataset, batch_size=batch_size)

model = UNet(nb_classes=7)
model = model.to(device)
#sparcecategoricalentropy for number in targets
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
#summary(model, input_size=(3, 128, 128))

PREDS = []
TARGETS = []

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        y_pred = model(inputs)
        #print(y_pred.detach().cpu().numpy())
        #print(np.unique(labels.detach().cpu().numpy()))
        #time.sleep(10)
        loss = criterion(y_pred, labels)

        ### additional evaluation ?
        #print("Unique: ", np.unique(labels.detach().cpu().numpy()))
        #preds = y_pred.sigmoid().sum(1).detach().round()
        #print(preds)
        #print(preds.cpu().numpy().size)
        #print(np.unique(labels.detach().cpu().numpy()))
        #print(y_pred.sigmoid().detach().cpu().numpy()[0])
        ### 
        
        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'epoch: {epoch}, i: {i}, loss: {loss.item()}')
        print(torch.sigmoid(y_pred).data.cpu().numpy().shape)
print('done')

In [None]:
pip install torchsummary

In [None]:
import pandas as pd

df = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv')
df

In [None]:
os.path.isfile("/kaggle/input/prostate-cancer-grade-assessment/train_images/0005f7aaab2800f6170c399693a96917.tiff")