In [None]:
#install openslide
!apt update && apt install -y openslide-tools
!pip install openslide-python

In [None]:
import openslide
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import StratifiedShuffleSplit 
import torch
import torchvision
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as D
import torchvision.transforms as transforms
from torch import nn


In [None]:
#check train.csv
Dataset = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv')
Dataset

In [None]:
#test dataset is too small (consider only split train dataset to(training:60%,testing:20%,validation:20%))
testDataset = pd.read_csv('../input/prostate-cancer-grade-assessment/test.csv')
testDataset

In [None]:
#check if dataset contains any null value (no null value)
Dataset.isnull().sum()

In [None]:
#check how many classes are in gleason score (11 classes)
gleason_score = Dataset.gleason_score.unique()
print('Classes in gleason score: ',len(gleason_score),'\n')
print(Dataset.gleason_score.value_counts())

In [None]:
import seaborn as sns
#visualize gleason score data
glsply = sns.countplot(x='gleason_score',data=Dataset,color='dodgerblue') 
glsply.set(xlabel = None)
glsply.set(title='gleason score')
glsply.tick_params(axis='x', rotation=30)

In [None]:
#check how many classes are in isUp grade(6 classes)
isUp_grade= Dataset.isup_grade.unique()
print('Classes in isUp grade: ',len(isUp_grade),'\n')
print(Dataset.isup_grade.value_counts())

In [None]:
glsply = sns.countplot(x='isup_grade',data=Dataset,color='green') 
glsply.set(xlabel = None)
glsply.set(title='isUp grade')
#glsply.tick_params(axis='x', rotation=30)

In [None]:
glsply = sns.countplot(x='data_provider',data=Dataset) 
glsply.set(xlabel = None)
glsply.set(title='Data Provider')

In [None]:
#do we need to apply weights?

In [None]:
'''
check what is gleason score when isUp grade is 0 
(when isUpgrade == 2, 4+3 maybe a misdiagnosis)
(to see if we need to remove it)
'''
for i in range(6):
    print('isUp grade = '+str(i)+': ')
    print(Dataset.loc[Dataset.isup_grade == i,'gleason_score'].value_counts())
    print()

In [None]:
gleason_score

In [None]:
#relabel gleason score
target = []
for i in range(len(Dataset)):
    if Dataset.iloc[i]['gleason_score'] == '0+0':
        target.append(0)
    if Dataset.iloc[i]['gleason_score'] == 'negative':
        target.append(0)
    elif Dataset.iloc[i]['gleason_score'] == '3+3':
        target.append(1)
    elif Dataset.iloc[i]['gleason_score'] == '3+4':
        target.append(2)
    elif Dataset.iloc[i]['gleason_score'] == '4+3':
        target.append(3)
    elif Dataset.iloc[i]['gleason_score'] == '4+4':
        target.append(4)
    elif Dataset.iloc[i]['gleason_score'] == '3+5':
        target.append(5)
    elif Dataset.iloc[i]['gleason_score'] == '5+3':
        target.append(6)
    elif Dataset.iloc[i]['gleason_score'] == '4+5':
        target.append(7)
    elif Dataset.iloc[i]['gleason_score'] == '5+4':
        target.append(8)
    elif Dataset.iloc[i]['gleason_score'] == '5+5':
        target.append(9)

Dataset['target'] = target
Dataset

In [None]:
#check how many cases are in dataset
print('Number of cases: ',len(Dataset))
#check how many images are in train_image and train_label_masks folder
train_path = '../input/prostate-cancer-grade-assessment/train_images'
mask_path = '../input/prostate-cancer-grade-assessment/train_label_masks'

train_image = 0
for path in os.listdir(train_path):
    if os.path.isfile(os.path.join(train_path, path)):
        train_image += 1
print('Number of train image: ',train_image)

mask_image = 0
for path in os.listdir(mask_path):
    if os.path.isfile(os.path.join(mask_path, path)):
        mask_image += 1
print('Number of mask image: ',mask_image)

**Problem here: missing some mask images!**

In [None]:
#check some images
#1.select 2 cases per gleason score class
TempImageDataset = pd.DataFrame()
for i in range(len(gleason_score)):
    TempImageDataset = TempImageDataset.append(Dataset.loc[Dataset.gleason_score == gleason_score[i]][:2])
TempImageDataset.reset_index()

In [None]:
#check images using openslide
def plotImageAndmasks(TempImageDataset):
    fig = plt.figure(figsize=(40, 70))
    rows = 11
    columns = 4
    k = 1
    for i in range(len(TempImageDataset)):
        #get each image id
        image_id = TempImageDataset.iloc[i]['image_id']
        #read image
        img = openslide.OpenSlide('../input/prostate-cancer-grade-assessment/train_images/'+str(image_id)+'.tiff')
        #print('Number of image levels: ',img.level_count)#----3
        #print('dimensions of levels: ',img.level_dimensions)#----((27648, 29440), (6912, 7360), (1728, 1840))
        img = img.read_region((0, 0), img.level_count-1, img.level_dimensions[2]).convert('RGB')
        fig.add_subplot(rows, columns, k)
        #plt.axis('off')
        plt.title('isUp_grade: '+str(TempImageDataset.iloc[i]['isup_grade'])+
                  ' gleason_score: '+str(TempImageDataset.iloc[i]['gleason_score'])+
                  '\ndata_provider: '+str(TempImageDataset.iloc[i]['data_provider']))
        k+=1
        plt.imshow(img)
        # plot corresponding masks
        #1.check if it's in mask folder
        file_exists = os.path.exists('../input/prostate-cancer-grade-assessment/train_label_masks/'+str(image_id)+'_mask.tiff')
        if file_exists == True:
            #2.load masks
            mask = openslide.OpenSlide('../input/prostate-cancer-grade-assessment/train_label_masks/'+str(image_id)+'_mask.tiff')
            mask_img = mask.read_region((0,0), mask.level_count - 1, mask.level_dimensions[mask.level_count - 1]).convert(mode='RGB')
            #split image to individual bands(a copy of one of original bands (red,green,blue))
            #now mask is in red band.
            mask_img = mask_img.split()[0]
            #if the raw mode is “RGB”, then palette sequence must contain at most 768 values
            palette = [0, 0, 0, 102, 102, 102, 255, 0, 0, 0, 251, 255, 246, 255, 0, 0, 255, 8]
            palette.extend([0]*750)
            mask_img.putpalette(data=palette, rawmode='RGB')
            fig.add_subplot(rows, columns, k)
            #plt.axis('off')
            plt.title('isUp_grade: '+str(TempImageDataset.iloc[i]['isup_grade'])+
                      ' gleason_score: '+str(TempImageDataset.iloc[i]['gleason_score'])+
                      '\ndata_provider: '+str(TempImageDataset.iloc[i]['data_provider']))
            plt.imshow(mask_img)
        k+=1

plotImageAndmasks(TempImageDataset)

**different data providers had different masks**

In [None]:
#overlay the mask on the image

**What input we should use?  images or images with mask**

**What class attribute we want to predict? isUp_grade(6 classes) or Gleason_score(11 classes, if we use this, need to consider data ibalance)**

In [None]:
Dataset = Dataset.drop([Dataset.index[7273]])

In [None]:
'''In this part, we split dataset'''
'''(only loaded 6 images to training dataset and 6 images to testing dataset for testing)'''

def splitData(Dataset):
    image_id = Dataset.image_id.values
    label = Dataset.target.values
    '''
    tg = Dataset['target'].unique()
    image_id,label = [],[]
    for l in tg:
        da = Dataset.loc[Dataset.target == l,'image_id']
        for i in range(10):
            image_id.append(da.iloc[i])
            label.append(l)
    '''
########tutorial: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedShuffleSplit.html########################
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
    split.get_n_splits(image_id, label)
    #train_data,test_data = split.split(image_id, label)
    image_id = np.array(image_id)
    label = np.array(label)
    for train_index, test_index in split.split(image_id, label):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = image_id[train_index], image_id[test_index]
        y_train, y_test = label[train_index], label[test_index]
##########################################################################################################################
    return X_train, X_test,y_train, y_test
    
X_train, X_test,y_train, y_test = splitData(Dataset)

In [None]:
#Data Augmentation and create dataloader
###############tutorial: https://androidkt.com/load-custom-image-datasets-into-pytorch-dataloader-without-using-imagefolder/#####################
norm_mean = (0.4914, 0.4822, 0.4465)
norm_std = (0.2023, 0.1994, 0.2010)

class imageDataset(D):
    def __init__(self, X_train,y_train):
        self.X_t = X_train
        self.y_t = y_train
        #make transformations (basic transformations)
        self.transform =transforms.Compose([transforms.Resize((224,224)),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.RandomRotation(degrees=60),
                                            transforms.ToTensor(),
                                            transforms.Normalize(norm_mean, norm_std)])
        
    def __len__(self):
        return len(self.X_t)

    def __getitem__(self, index):
        img = openslide.OpenSlide('../input/prostate-cancer-grade-assessment/train_images/'+str(self.X_t[index])+'.tiff')
        img = img.read_region((0, 0), img.level_count-1, img.level_dimensions[2]).convert('RGB')
        #make transformations
        image=self.transform(img)
        target = self.y_t[index]
        #write images and labels into a dictionary ()
        sample = {'images': image,'labels':target}
        return sample

#dataloader 
train_dataset = imageDataset(X_train,y_train)
test_dataset = imageDataset(X_test,y_test)
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
#########################################################################################################

In [None]:

###############tutorial: https://androidkt.com/load-custom-image-datasets-into-pytorch-dataloader-without-using-imagefolder/#####################

# Get a batch of train loader
#Get some random training images
images = next(iter(train_loader))
def plot_image(img):
    output = torchvision.utils.make_grid(img/ 2 + 0.5)
    output = output.numpy().transpose((1, 2, 0))
    output = np.clip(output, 0, 1)
    return output
# check if store images properly in a batch
#Show images
#output = torchvision.utils.make_grid(images['images'][0]/ 2 + 0.5)
#output = output.numpy().transpose((1, 2, 0))
#output = np.clip(output, 0, 1)
output = plot_image(images['images'])
#plt.title('isup_grade: '+str(images['labels'][0].numpy()))
plt.imshow(output)
#########################################################################################################


In [None]:
#check device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
'''from HW6 LeNet'''
class CNN(nn.Module):#(LeNet)
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
                        nn.Conv2d(3, 6, (5,5), padding=2),
                        nn.ReLU(),
                        nn.MaxPool2d((2, 2)))
        self.layer2 = nn.Sequential(
                        nn.Conv2d(6, 16, (5,5)),
                        nn.ReLU(),
                        nn.MaxPool2d((2, 2)))
        self.seq = nn.Sequential(
                nn.Linear(16*54*54, 120),
                nn.ReLU(),
                nn.Linear(120, 84),
                nn.ReLU(),
                nn.Linear(84, 10)
                )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.reshape(x.shape[0], -1)
        x = self.seq(x)
        return x

In [None]:
import torch.optim as optim
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)


In [None]:
num_epochs = 10
#for accuracy and losses record
train_accuracy = []
Val_accuracy = []
train_losses = []
Val_losses = []

#start training
for epoch in range(num_epochs):
    train_loss = 0.0
    correct_total= 0.0
    Vcorrect_total= 0.0
    num_samples_total=0.0
    Vnum_samples_total=0.0
    valid_loss = 0.0
    for i, data in enumerate(train_loader):
        
        # get the inputs
        inputs, labels = data['images'],data['labels']
        inputs, labels = inputs.to(device), labels.to(device)
        # set parameter gradients to zero
        optimizer.zero_grad()
        # forward
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        #backward
        loss.backward()
        #optimize
        optimizer.step()
        
        #accuracy for it
        _, predicted = torch.max(outputs, 1)
        correct = (predicted == labels).sum().item()
        num_samples_total +=labels.size(0)
        correct_total +=correct
        train_loss += loss.item()
    
    train_losses.append(train_loss/len(train_loader))
    train_accuracy.append(correct_total/num_samples_total)
    
    model.eval()
 
    for l, testdata in enumerate(test_loader):
        val_inputs, val_labels = testdata['images'],testdata['labels']
        val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
        val_outputs = model(val_inputs)
    
        vloss = criterion(val_outputs, val_labels)
        valid_loss += vloss.item() 
        _, Vpredicted = torch.max(val_outputs, 1)
        Vcorrect = (Vpredicted == val_labels).sum().item()
        Vnum_samples_total +=val_labels.size(0)
        Vcorrect_total +=Vcorrect
        

    print('Epoch: %d' %(epoch+1))
    print('train loss: %.3f  train accuracy:%.3f  val loss: %.3f  val accuracy: %.3f ' %(train_loss/len(train_loader), correct_total/num_samples_total,valid_loss/len(test_loader),Vcorrect_total/Vnum_samples_total))

    
    Val_losses.append(valid_loss/len(test_loader))
    Val_accuracy.append(Vcorrect/Vnum_samples_total)

In [None]:
# MODEL EVALUATION

plt.plot(train_accuracy, label='train_accuracy')
plt.plot(Val_accuracy, label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()

In [None]:
plt.plot(train_losses, label='train_loss')
plt.plot(Val_losses, label='valid_loss')
plt.xlabel('Epoch')
plt.ylabel('loss')
plt.show()

In [None]:
from collections import OrderedDict, Sequence

class _BaseWrapper(object):
    """
    Please modify forward() and backward() according to your task.
    """

    def __init__(self, model):
        super(_BaseWrapper, self).__init__()
        self.device = next(model.parameters()).device
        self.model = model
        self.handlers = []  # a set of hook function handlers

    def _encode_one_hot(self, ids):
        one_hot = torch.zeros_like(self.logits).to(self.device)
        one_hot.scatter_(1, ids, 1.0)
        return one_hot

    def forward(self, image):
        """
        Simple classification
        """
        self.model.zero_grad()
        self.logits = self.model(image)
        self.probs = torch.nn.functional.softmax(self.logits, dim=1)
        return self.probs.sort(dim=1, descending=True)

    def backward(self, ids):
        """
        Class-specific backpropagation
        Either way works:
        1. self.logits.backward(gradient=one_hot, retain_graph=True)
        2. (self.logits * one_hot).sum().backward(retain_graph=True)
        """

        one_hot = self._encode_one_hot(ids)
        self.logits.backward(gradient=one_hot, retain_graph=True)

    def generate(self):
        raise NotImplementedError

    def remove_hook(self):
        """
        Remove all the forward/backward hook functions
        """
        for handle in self.handlers:
            handle.remove()


class GradCAM(_BaseWrapper):
    """
    "Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization"
    https://arxiv.org/pdf/1610.02391.pdf
    Look at Figure 2 on page 4
    """

    def __init__(self, model, candidate_layers=None):
        super(GradCAM, self).__init__(model)
        self.fmap_pool = OrderedDict()
        self.grad_pool = OrderedDict()
        self.candidate_layers = candidate_layers  # list

        def forward_hook(key):
            def forward_hook_(module, input, output):
                # Save featuremaps
                self.fmap_pool[key] = output.detach()

            return forward_hook_

        def backward_hook(key):
            def backward_hook_(module, grad_in, grad_out):
                # Save the gradients correspond to the featuremaps
                self.grad_pool[key] = grad_out[0].detach()

            return backward_hook_

        # If any candidates are not specified, the hook is registered to all the layers.
        for name, module in self.model.named_modules():
            if self.candidate_layers is None or name in self.candidate_layers:
                self.handlers.append(module.register_forward_hook(forward_hook(name)))
                self.handlers.append(module.register_backward_hook(backward_hook(name)))

    def _find(self, pool, target_layer):
        if target_layer in pool.keys():
            return pool[target_layer]
        else:
            raise ValueError("Invalid layer name: {}".format(target_layer))

    def _compute_grad_weights(self, grads):
        return torch.nn.functional.adaptive_avg_pool2d(grads, 1)

    def forward(self, image):
        self.image_shape = image.shape[2:]
        return super(GradCAM, self).forward(image)

    def generate(self, target_layer):
        fmaps = self._find(self.fmap_pool, target_layer)
        grads = self._find(self.grad_pool, target_layer)
        weights = self._compute_grad_weights(grads)

        gcam = torch.mul(fmaps, weights).sum(dim=1, keepdim=True)
        gcam = torch.nn.functional.relu(gcam)

        gcam = torch.nn.functional.interpolate(
            gcam, self.image_shape, mode="bilinear", align_corners=False
        )

        B, C, H, W = gcam.shape
        gcam = gcam.view(B, -1)
        gcam -= gcam.min(dim=1, keepdim=True)[0]
        gcam /= gcam.max(dim=1, keepdim=True)[0]
        gcam = gcam.view(B, C, H, W)

        return gcam

In [None]:
def demo2(image, label, model):
    """
    Generate Grad-CAM
    """
    # Model
    model = model
    model.to(device)
    model.eval()

    # The layers
    target_layers = ["layer2"]
    target_class = label

    # Images
    images = image.unsqueeze(0)
    gcam = GradCAM(model=model)
    probs, ids = gcam.forward(images)
    ids_ = torch.LongTensor([[target_class]] * len(images)).to(device)
    gcam.backward(ids=ids_)

    for target_layer in target_layers:
        print("Generating Grad-CAM @{}".format(target_layer))

        # Grad-CAM
        regions = gcam.generate(target_layer=target_layer)
        for j in range(len(images)):
            #print("\t#{}: {} ({:.5f})".format(j, classes[target_class], float(probs[ids == target_class]))
            
            gcam=regions[j, 0]
            plt.imshow(gcam.cpu())
            plt.show()
            
#image, label = next(iter(test_loader))
images = next(iter(test_loader))
# Load the model
model = model
img = images['images']
labal = images['labels']
# Grad cam
demo2(img[0].to(device), labal[0].to(device), model)


image = np.transpose(images['images'][0], (1,2,0))
image2  = np.add(np.multiply(image.numpy(), np.array(norm_std)) ,np.array(norm_mean))
#print("True Class: ", classes[label[0].cpu()])
plt.imshow(image)
plt.show()
plt.imshow(image2)
plt.show()