In [None]:
!pip install lightning

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import albumentations as A
import torch
import cv2
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
BATCH_SIZE = 16
OUT_DIR = "/kaggle/working/log/freeze"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))
        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/UBC-OCEAN/train.csv');
df = df[df.is_tma == False]
df.shape
#data_csv.iloc[1].image_id


In [None]:
from torch.utils.data import Dataset, DataLoader
from albumentations.augmentations.geometric.resize import LongestMaxSize
from albumentations.pytorch.transforms import ToTensorV2
from albumentations.augmentations.crops.transforms import CenterCrop
from torchvision import transforms
code_mapping = {"HGSC": 0,
      "LGSC": 1,
      "EC": 2,
      "CC": 3,
      "MC": 4}

class ubc_data(Dataset):
    def __init__(self, df):
        #self.vl = var_list
        self.df = df
    def __getitem__(self,idx):
        cur_row = self.df.iloc[idx]
        img = cv2.imread(f"/kaggle/input/UBC-OCEAN/train_thumbnails/{cur_row.image_id}_thumbnail.png")
        #print(type(img))
        transform = A.Compose([LongestMaxSize(224), A.PadIfNeeded(min_height=224, min_width=224, border_mode=0, value=(0,0,0)), ToTensorV2()])
        norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.22])
        return norm(transform(image=img)["image"].float()), code_mapping[cur_row.label]

        
    def __len__(self):
        return len(self.df.index)


In [None]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.20)

In [None]:

train_data = ubc_data(train_df)
val_data = ubc_data(val_df)
train_loader = DataLoader(train_data, 
                          batch_size = BATCH_SIZE, 
                          num_workers=3,
                          shuffle = True)
val_loader = DataLoader(val_data, 
                          batch_size = BATCH_SIZE, 
                        num_workers=3,
                          shuffle = False)
print(train_data[0][0].shape)
image = train_data[0][0].permute(1, 2, 0)
plt.imshow(image.numpy())


In [None]:
import torch.nn as nn

class my_resnet18(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
        ct = 0
        for child in self.net.children():
            ct += 1
            if ct < 8:
                for param in child.parameters():
                    param.requires_grad = False
            print(child.name)
        print(ct)
        
    def forward(self,x):
        return self.net(x)

In [None]:
#model


import lightning.pytorch as pl
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

class my_resnet18_PL(pl.LightningModule):
    def __init__(self, resnet):
        super().__init__()
        #self.num_outs = num_classes
        self.acc_list = []
        self.E_acc = []
        self.val_lost_list = []
        self.E_val_loss = []
        self.train_lost_list = []
        self.E_train_loss = []
        self.epoch = 0
        self.resnet = resnet
        self.classifier = nn.Sequential(nn.Linear(1000, 5))
        
    def forward(self, x):
        return self.classifier(self.resnet(x))
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y_t = batch
        
        y_p = self(x)
        #print("xshape", x.shape)
        #print("y_P", y_p.shape)
        
        #print("y_p", y_p)
        #print("x", x)
        loss = F.cross_entropy(y_p, y_t)
        self.train_lost_list.append(loss.item())
        return loss
    
   
    def validation_step(self, batch, batch_idx):
        with torch.no_grad():
            # training_step defines the train loop.
            #print("batch", batch)
            x, y_t = batch
            y_p = self(x)
            #print("y_p", y_p)
            #print("y_t", y_t)
            loss = F.cross_entropy(y_p, y_t)
            temp = torch.zeros(BATCH_SIZE, 5)
            #for i in range(BATCH_SIZE):
            #    temp[i, y_t[i]] = 1
            #y_t = temp
            #print(y_t.shape, y_p.shape)
            #print(y_p)
            #print(torch.argmax(y_p, dim=1))
            y_p = torch.argmax(y_p, dim=1)
            acc = accuracy_score(y_t, y_p)

            #self.log("val_loss", loss.item(),prog_bar=True)
            self.acc_list.append(acc)
            self.val_lost_list.append(loss.item())
            #return loss
    
    def on_train_epoch_end(self) -> None:
        loss = sum(self.train_lost_list) / len(self.train_lost_list)
        self.E_train_loss.append(loss)
    
    def on_validation_epoch_end(self) -> None:
       
        loss = sum(self.val_lost_list) / len(self.val_lost_list)
        acc = sum(self.acc_list) / len(self.acc_list)
        self.E_val_loss.append(loss)
        self.E_acc.append(acc)
        
        if ((self.epoch+1) % 2) == 0:
            figure_1, train_ax = plt.subplots()
            figure_2, valid_ax = plt.subplots()
            #print(model.train_lost_list)
            train_ax.plot(self.E_train_loss, color='blue')
            train_ax.set_xlabel('epochs')
            train_ax.set_ylabel('train loss')
            valid_ax.plot(self.E_val_loss, color='red')
            valid_ax.plot(self.E_acc, color='green')
            valid_ax.set_xlabel('epochs')
            valid_ax.set_ylabel('metric')
            figure_1.savefig(f"{OUT_DIR}/train_loss_{self.epoch+1}.png")
            figure_2.savefig(f"{OUT_DIR}/valid_loss_{self.epoch+1}.png")
            torch.save(self.resnet.state_dict(), 
                f"{OUT_DIR}/resnet_{self.epoch+1}_model.pth")
            print(f'SAVING PLOTS COMPLETE...{self.epoch+1}')
        self.epoch += 1
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.04)

In [None]:
model = my_resnet18_PL(my_resnet18())

In [None]:
trainer = pl.Trainer(max_epochs=50, default_root_dir="/kaggle/working/log", log_every_n_steps=13)
trainer.fit(model=model, train_dataloaders=train_loader, val_dataloaders = val_loader)

In [None]:
!mkdir /kaggle/working/log/freeze/

In [None]:
print(model.E_val_loss[:15])
print(model.E_acc[:])
print(model.E_train_loss[:15])

In [None]:
from sklearn.metrics import confusion_matrix
device = DEVICE
y_gt_all = []
y_pr_all = []
model.eval()
with torch.no_grad():  # declare no gradient operations
    for step, minibatch in enumerate(val_loader):
        # 1. Get a minibatch data for testing
        x, y_gt = minibatch[0], minibatch[1]
        x = x.to(device)        # of size (batchsize, 3, H, W)
        y_gt = y_gt.to(device)  # of size (batchsize, 1)

        # 2. Compute the forward pass
        y_pr = model(x)         # of size (batchsize, n_classes)
        y_pr = F.softmax(y_pr, dim=1)

        # 3. Get y_gt and y_pr to compute the performance metric of the test set
        y_gt = y_gt.detach().cpu().numpy()
        y_pr = y_pr.detach().cpu().numpy()
        y_gt_all = np.concatenate((y_gt_all, y_gt), axis=0) if len(y_gt_all) > 0 else y_gt
        y_pr_all = np.concatenate((y_pr_all, y_pr), axis=0) if len(y_pr_all) > 0 else y_pr


In [None]:
conf_mat = confusion_matrix(y_gt_all, np.argmax(y_pr_all, axis=1))
conf_mat

In [None]:
import seaborn as sns
label_mapping = {0:"HGSC",
      1:"LGSC",
      2:"EC",
      3:"CC",
      4:"MC"}
tick_labels = [label_mapping[i] for i in range(5)]
sns.heatmap(conf_mat, annot=True, vmax=24, xticklabels=tick_labels, yticklabels=tick_labels, cmap= sns.color_palette("YlOrBr_r", as_cmap=True))
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
#plt.savefig('conf_mat.png')

In [None]:
from sklearn.metrics import fbeta_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

def compute_metrics(y_pr, y_gt, label_list):
    """
    Compute performance metrics of y_pr and y_gt
    Args:
        y_pr: 2D array of size (batchsize, n_classes)
        y_gt: 1D array of size (batchsize,)
        label_list: list of labels of the classification problem
    Returns: dictionary of metrics:
    """
 
    if len(label_list) == 2:
        # Get the prob. of label-1 class
        y_pr = y_pr[:, 1]
        auc = roc_auc_score(y_true=y_gt, y_score=y_pr)

        # Get the output labels of the y_pr
        threshold = 0.5
        y_pr[y_pr >= threshold] = 1.0
        y_pr[y_pr < threshold] = 0.0
        accuracy = accuracy_score(y_true=y_gt, y_pred=y_pr)
        precision = precision_score(y_true=y_gt, y_pred=y_pr, pos_label=1, 
                                    average='binary', zero_division=1)
        recall = recall_score(y_true=y_gt, y_pred=y_pr, pos_label=1, 
                              average='binary')
        f1_score = fbeta_score(y_true=y_gt, y_pred=y_pr, beta=1, pos_label=1, 
                               average='binary')
        f2_score = fbeta_score(y_true=y_gt, y_pred=y_pr, beta=2, pos_label=1, 
                               average='binary')

    else:
        # Compute the one-hot coding of the y-gt
        try: 
            y_onehot = np.zeros(y_pr.shape)
            for k in range(len(y_gt)):
                y_onehot[k, y_gt[k]] = 1
            auc = roc_auc_score(y_true=y_onehot, y_score=y_pr)
        
        except Exception: # error when not all classes presented in y_gt
            auc = 0

        # Get the output labels of the y_pr
        y_pr = np.argmax(y_pr, axis=1)
        accuracy = accuracy_score(y_true=y_gt, y_pred=y_pr)
        precision = precision_score(y_true=y_gt, y_pred=y_pr, labels=label_list, 
                                    average='macro', zero_division=1)
        recall = recall_score(y_true=y_gt, y_pred=y_pr, pos_label=1, 
                              labels=label_list, average='macro')
        f1_score = fbeta_score(y_true=y_gt, y_pred=y_pr, beta=1, 
                               labels=label_list, average='macro')
        f2_score = fbeta_score(y_true=y_gt, y_pred=y_pr, beta=1, 
                               labels=label_list, average='macro')

    return {'accuracy': accuracy, 'precision': precision, 'recall': recall,
            'f1_score': f1_score, 'f2_score': f2_score, 'auc': auc}

In [None]:
class_id_list = list(range(5))
performance = compute_metrics(y_pr_all, y_gt_all, class_id_list)
print(f"Testing performance: ")
for k, v in performance.items():
    print(f'\t{k}: \t{v:.4f}')

In [None]:
l=[[v] for k,v in performance.items()]

df = pd.DataFrame(l)
df.index = performance.keys()

In [None]:
l