In [1]:
from tqdm.notebook import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torch
import torch.nn as nn
import os
import numpy as np
import platform
import pickle
from PIL import Image
from torchvision import transforms
from numpy import random
from torch.utils.data import DataLoader
import albumentations as A
import matplotlib
import pandas as pd



In [2]:
# Set up augmentation class
import torchvision.transforms.functional as TF
import random

class MyTransform:
    """Rotate by one of the given angles."""

    def __init__(self):
        self.angles = [-30, -15, 0, 15, 30]
        self.brightness_levels = [0.9, 1.1, 1.2]

    def __call__(self, x):
        angle = random.choice(self.angles)
        x = TF.rotate(x, angle)
        x = TF.adjust_brightness(x, random.choice(self.brightness_levels))
        return x

tranformation = MyTransform()

In [11]:
import torch
import numpy as np
import os
import collections

from PIL import Image
from torchvision import transforms
from numpy import random
import numpy as np
import random
from typing import Tuple, Callable
import pandas as pd



class Dataset(torch.utils.data.Dataset):
    def __init__(self, annotation_frame: pd.DataFrame, path_to_slides: str, image_size: Tuple[int, int],
                transformation_fn: Callable = None, pseudo_epoch_length=1000, mode=""):
        super().__init__()
        self._image_size = image_size

        self._resize = transforms.Resize(size=image_size, antialias=True)
        
        self.annotations_frame = annotation_frame
        self.mode = mode
        if not os.path.isdir(path_to_slides):
            raise IOError("Image path is not set correctly")
            
        self.path_to_slides = path_to_slides
        self.pseudo_epoch_length = pseudo_epoch_length
        
        self.transformation = transformation_fn
        self.transform_to_tensor = transforms.ToTensor()
        self.slide_list, self.annotation_list = self._initialize()
        

    def __len__(self):
        return len(self.annotation_list)

        
    def _initialize(self):
        """
        Initilize the internal dictionary
        """

        slide_list = []
        annotation_list = []
        for _, row in self.annotations_frame.iterrows():
            image = row['filename']
            label = row['label']
            open_image = Image.open(os.path.join(self.path_to_slides, image)).convert('RGB')
            open_image = self.transform_to_tensor(open_image)
            if self.mode == "train":
                if label == 1:
                    # Undersample class 1 with a probability of 25%
                    if random.choice([1, 2, 3, 4]) == 1:
                        continue
                    slide_list.append(open_image)
                    annotation_list.append(label)
                # Oversample classes with label 3 using transformations
                elif label == 3:
                        
                    slide_list.append(open_image)
                    annotation_list.append(label)

                    slide_list.append(open_image)
                    annotation_list.append(label)


                elif label in [4, 5]:
                    for i in range(3):
                        slide_list.append(open_image)
                        annotation_list.append(label)

                elif label in [0, 2]:

                    slide_list.append(open_image)
                    annotation_list.append(label)
                    
    
                elif label in  [9 , 10, 11]:
                    for i in range(20):
                        slide_list.append(open_image)
                        annotation_list.append(label)
                else:
                    for i in range(15):
                        slide_list.append(open_image)
                        annotation_list.append(label)
            else:
                slide_list.append(open_image)
                annotation_list.append(label)
                    
        
        print(f"The counts after adjustment: {collections.Counter(annotation_list)}")
        print(f"The number of images in the dataset : {len(annotation_list)}")


        return slide_list, annotation_list
    
    
    def __getitem__(self,index):
        """
        Load an image
        """

        img = self.slide_list[index]
        label = self.annotation_list[index]

        # Perform necessary conversions
        img = self._resize.forward(img)
        img = self.transformation(img) if self.transformation else img
        
           
        return img.to(torch.device("cuda")), torch.tensor(label, device=torch.device("cuda"), dtype=torch.int64)

In [3]:
from sklearn.model_selection import train_test_split
import albumentations as A

annotation_frames = pd.read_pickle("annotations.p")

# Stratified sampling

df_train, df_test = train_test_split(annotation_frames, test_size=0.2, stratify=annotation_frames[["label"]])

print(df_train['label'].value_counts())
# Augmentation function

train_dataset = Dataset(path_to_slides="/home/ESPL_001/user/Downloads/fourth_milestore/crops", image_size=(32, 32), annotation_frame=df_train, transformation_fn=tranformation, mode="train")
test_dataset = Dataset(path_to_slides="/home/ESPL_001/user/Downloads/fourth_milestore/crops", image_size=(32, 32), annotation_frame=df_test, mode="test")


label
1     6522
0     3569
2     3202
3     1302
4      587
5      285
6      154
7       62
8       38
11      23
9       22
10      21
Name: count, dtype: int64
The counts after adjustment: Counter({1: 4889, 0: 3569, 2: 3202, 3: 2604, 6: 2310, 4: 1761, 7: 930, 5: 855, 8: 570, 11: 460, 9: 440, 10: 420})
The number of images in the dataset : 22010
The counts after adjustment: Counter({1: 1630, 0: 892, 2: 801, 3: 326, 4: 147, 5: 71, 6: 39, 7: 16, 8: 9, 9: 6, 10: 5, 11: 5})
The number of images in the dataset : 3947


In [4]:
train_laoder = DataLoader(train_dataset, batch_size=5, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

#1. Implement a Dataset Class

#2. Develop a Custom Classification Model

In [5]:
import torch.optim as optim
from tqdm import tqdm

class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3), stride=1, padding=1)
        self.act1 = nn.ReLU()
        self.drop1 = nn.Dropout(0.3)
 
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(3,3), stride=1, padding=1)
        self.act2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 2))

        self.conv3 = nn.Conv2d(64, 32, kernel_size=(2,2), stride=2, padding=1)
        self.act3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=(2, 2))
 
        self.flat = nn.Flatten()
 
        self.fc4 = nn.Linear(512, 512)
        self.act4 = nn.ReLU()
        self.drop4 = nn.Dropout(0.3)
 
        self.fc5 = nn.Linear(512, 12)
  


    def forward(self, x):
        # input 3x32x32, output 32x32x32
        x = self.act1(self.conv1(x))
        x = self.drop1(x)
        # input 32x32x32, output 32x32x32
        x = self.act2(self.conv2(x))
        # input 32x32x32, output 32x16x16
        x = self.pool2(x)
        x = self.act3(self.conv3(x))
        x = self.pool3(x)
        # input 32x16x16, output 8192
        x = self.flat(x)
        # input 8192, output 512
        x = self.act4(self.fc4(x))
        x = self.drop4(x)
        # input 512, output 12
        x = self.fc5(x)

        return x


In [6]:
torch.cuda.is_available()

True

In [7]:
model = Classifier()
model.to(torch.device("cuda"))
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(torch.device("cuda"))
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

n_epochs = 10
for epoch in range(n_epochs):
    for inputs, labels in tqdm(train_laoder):
        # forward, backward, and then weight update
        y_pred = model(inputs)
        loss = loss_fn(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    acc = 0
    count = 0
    for inputs, labels in test_loader:
        y_pred = model(inputs)
        acc += (torch.argmax(y_pred, 1) == labels).float().sum()
        count += len(labels)
    acc /= count
    print("Epoch %d: model accuracy %.2f%%" % (epoch, acc*100))
        

100%|██████████████████████████████████████| 4402/4402 [00:12<00:00, 345.24it/s]


Epoch 0: model accuracy 44.92%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 385.50it/s]


Epoch 1: model accuracy 53.38%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 385.92it/s]


Epoch 2: model accuracy 52.09%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 385.63it/s]


Epoch 3: model accuracy 55.05%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 383.29it/s]


Epoch 4: model accuracy 62.35%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 382.10it/s]


Epoch 5: model accuracy 64.63%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 379.13it/s]


Epoch 6: model accuracy 65.19%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 387.16it/s]


Epoch 7: model accuracy 67.24%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 388.88it/s]


Epoch 8: model accuracy 66.56%


100%|██████████████████████████████████████| 4402/4402 [00:11<00:00, 393.74it/s]


Epoch 9: model accuracy 66.33%


In [8]:
def find_image_size_after_kernel(image_size, filter_size, stride=1, padding=1):
    return ((image_size - filter_size + 2*padding)/stride) + 1

print(find_image_size_after_kernel(36, 2, padding=0, stride=2))


18.0


#4. Evaluate Model Performance

In [9]:
from sklearn.metrics import confusion_matrix

classes = [x for x in range(12)]


CM=0
model.eval()
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images
        labels = labels

        outputs = model(images) #file_name
        preds = torch.argmax(outputs.data, 1)
        CM+=confusion_matrix(labels.cpu(), preds.cpu(),labels=classes)

    tn=CM[0][0]
    tp=CM[1][1]
    fp=CM[0][1]
    fn=CM[1][0]
    acc=np.sum(np.diag(CM)/np.sum(CM))
    sensitivity=tp/(tp+fn)
    precision=tp/(tp+fp)

    print('\nTestset Accuracy(mean): %f %%' % (100 * acc))
    print()
    print('Confusion Matirx : ')
    print(CM)
    print('- Sensitivity : ',(tp/(tp+fn))*100)
    print('- Specificity : ',(tn/(tn+fp))*100)
    print('- Precision: ',(tp/(tp+fp))*100)
    print('- NPV: ',(tn/(tn+fn))*100)
    print('- F1 : ',((2*sensitivity*precision)/(sensitivity+precision))*100)
    print()



Testset Accuracy(mean): 69.318470 %

Confusion Matirx : 
[[ 758  112    2    3    4    0   12    1    0    0    0    0]
 [  46 1476   53   30    4    1   17    2    0    1    0    0]
 [  25  280  372   79   10    3   28    1    1    2    0    0]
 [  11   81   78   89   19    2   35    5    0    3    3    0]
 [   4   24   12   48   16    3   34    1    0    4    1    0]
 [   3    6    4   20    3    3   28    2    0    1    0    1]
 [   0    3    0    7    1    3   14    8    0    1    1    1]
 [   1    1    1    1    1    0    6    4    0    0    0    1]
 [   0    1    0    0    1    1    2    1    1    1    0    1]
 [   0    0    1    0    0    0    3    0    0    2    0    0]
 [   0    0    0    0    0    0    2    0    0    0    1    2]
 [   3    1    0    0    0    0    0    0    0    0    1    0]]
- Sensitivity :  96.97766097240473
- Specificity :  87.1264367816092
- Precision:  92.9471032745592
- NPV:  94.27860696517413
- F1 :  94.91961414790998



In [10]:
# prepare to count predictions for each class

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / (total_pred[classname] + 1)
    print(f'Accuracy for class: {classname} is {accuracy:.1f} %')

Accuracy for class: 0 is 84.9 %
Accuracy for class: 1 is 90.5 %
Accuracy for class: 2 is 46.4 %
Accuracy for class: 3 is 27.2 %
Accuracy for class: 4 is 10.8 %
Accuracy for class: 5 is 4.2 %
Accuracy for class: 6 is 35.0 %
Accuracy for class: 7 is 23.5 %
Accuracy for class: 8 is 10.0 %
Accuracy for class: 9 is 28.6 %
Accuracy for class: 10 is 16.7 %
Accuracy for class: 11 is 0.0 %
