In [1]:
import sys
sys.path.append('../..')

In [2]:
import os
import psutil

import random
import math
from functools import partial

import torch 
from torch import optim
from torch.optim import lr_scheduler
from torch import nn
from torch.nn import functional as F

import multiprocessing.dummy as mp

from pytorch_lightning import Trainer
from pytorch_lightning.core import LightningModule
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger


from lib.schedulers import DelayedScheduler
from lib.datasets import (max_lbl_nums, actual_lbl_nums, 
                          patches_rgb_mean_av1, patches_rgb_std_av1, 
                          get_train_test_img_ids_split)
from lib.dataloaders import PatchesDataset, WSIPatchesDatasetRaw, WSIPatchesDummyDataloader
from lib.augmentations import augment_v1_clr_only, augment_empty_clr_only
from lib.losses import SmoothLoss

from lib.models.unetv1 import get_model
from lib.models.features_map import FeaturesMap

from sklearn.metrics import cohen_kappa_score

from tqdm.auto import tqdm

import matplotlib.pyplot as plt

In [3]:
# import cv2
import numpy as np
# import pandas as pd
# from lib.datasets import patches_csv_path, patches_path
from lib.datasets import (patches_clean90_csv_path as patches_csv_path, patches_path,
                          patches_clean90_pkl_path as patches_pkl_path)
# from lib.dataloaders import imread, get_g_score_num, get_provider_num

In [4]:
train_img_ids, test_img_ids = get_train_test_img_ids_split()

test_img_ids[:4]

['e8baa3bb9dcfb9cef5ca599d62bb8046',
 '9b2948ff81b64677a1a152a1532c1a50',
 '5b003d43ec0ce5979062442486f84cf7',
 '375b2c9501320b35ceb638a3274812aa']

In [5]:
from lib.dataloaders import WSIPatchesDataloader, WSIPatchesDatasetRaw
from lib.utils import get_pretrained_model, get_features

In [6]:
batch_size = 64

In [7]:
device = torch.device('cuda:0')

In [None]:
model = get_pretrained_model(get_model, {
    'classes': actual_lbl_nums,
    #'backbone': 'seresnet18_mini',
    #'decoder_channels': (64, 64, 64, 32, 16),
}, 
"../Patches256TestRun/version_0/checkpoints/last.ckpt", 
#"/mnt/HDDData/notebooks/pcancer/experiments_results/pretrain/clean_patches90_small_se_resnet18_v1/version_0/checkpoints/last.ckpt",
device, encoder_only=False)

In [199]:
train_batch_path = '/mnt/HDDData/pdata/processed/pretrained/train/{}/'
test_batch_path = '/mnt/HDDData/pdata/processed/pretrained/val/'

train_loader = WSIPatchesDummyDataloader(train_batch_path, precalc_epochs=50, batch_size=batch_size, shuffle=True)
val_loader = WSIPatchesDummyDataloader(test_batch_path, precalc_epochs=50, batch_size=batch_size, shuffle=False)

In [177]:
data = []
target = []

for _ in tqdm(range(50)):
    for features, ys, xs, provider, isup_grade, gleason_score in train_loader:
        for b in range(features.shape[0]):
            r_mask = ys[b] > -1
            f = features.transpose(-2, -1)[b, r_mask][..., None, None]

            with torch.no_grad():
                preds = model.classification_head(f.to(device))

            lbls = torch.cat([preds.mean(dim=0),
                              torch.cat([F.softmax(preds[:, :6], dim=1), 
                                         F.softmax(preds[:, -3:], dim=1)],
                                        dim=1).mean(dim=0)]).cpu()

            c_lbl = torch.zeros((1, 9))
            p_lbl = torch.zeros((1, 9))
            a_max0 = preds[:, :6].argmax(dim=1)
            a_max1 = preds[:, -3:].argmax(dim=1) + 6

            for l in range(9):
                p_lbl[0, l] = (a_max0 == l).sum() + (a_max1 == l).sum()    
                p_lbl[0, l] = (a_max0 == l).float().mean() + (a_max1 == l).float().mean()            
            
            data.append(torch.cat([provider[b][None, None, ...].float(), 
                                   lbls[None, ...], 
                                   c_lbl, p_lbl], dim=1).numpy())
        target.append(isup_grade.numpy()) 
    
data = np.concatenate(data)
target = np.concatenate(target)

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




In [228]:
data = []
target = []

for _ in tqdm(range(1)):
    for features, ys, xs, provider, isup_grade, gleason_score in val_loader:
        r_mask = ys > -1

        f_ns = torch.arange(features.shape[0])[..., None].expand(features.shape[0], 300)[r_mask]
        f = features.transpose(-2, -1)[r_mask]

        out = [F.adaptive_max_pool1d(f[f_ns == i].T[None, ...], 1)[..., 0]
               for i in range(f_ns.max()+1)]
        out = torch.cat(out)        
        
        data.append(out.numpy())
        target.append(isup_grade.numpy()) 
    
data = np.concatenate(data)
target = np.concatenate(target)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [230]:
#val_data = data.copy()
#val_target = target.copy()

#np.save("val_raw512_data.npy", val_data)
#np.save("val_raw512_target.npy", val_target)

In [226]:
#train_data = data.copy()
#train_target = target.copy()

#np.save("train_raw512_data.npy", train_data)
#np.save("train_raw512_target.npy", train_target)

In [227]:
train_data.shape, train_target.shape

((421000, 512), (421000,))

In [7]:
train_data = np.load("train_raw512_data.npy")
train_target = np.load("train_raw512_target.npy")

val_data = np.load("val_raw512_data.npy")
val_target = np.load("val_raw512_target.npy")

In [8]:
import lightgbm as lgb

In [9]:
def quadratic_weighted_kappa(y_hat, y):
    return cohen_kappa_score(y_hat, y, weights='quadratic')

def QWK(preds, dtrain):
    labels = dtrain.get_label()
    preds = np.rint(preds)
    score = quadratic_weighted_kappa(preds, labels)
    return ("QWK", score, True)

In [233]:
train_data.shape

(421000, 512)

In [235]:
train_dataset = lgb.Dataset(train_data, train_target)
valid_dataset = lgb.Dataset(val_data, val_target)

In [236]:
params = {
            "objective": 'regression',
            "metric": 'rmse',
            "seed": 42,
            "learning_rate": 0.01,
            "boosting": "gbdt",
        }

In [237]:
model = lgb.train(
    params=params,
    num_boost_round=20000,
    early_stopping_rounds=2000,
    train_set=train_dataset,
    valid_sets=[train_dataset, valid_dataset],
    verbose_eval=100,
    feval=QWK,
)

Training until validation scores don't improve for 2000 rounds
[100]	training's rmse: 1.13963	training's QWK: 0.589671	valid_1's rmse: 1.13894	valid_1's QWK: 0.609197
[200]	training's rmse: 0.993364	training's QWK: 0.764304	valid_1's rmse: 0.980739	valid_1's QWK: 0.787545
[300]	training's rmse: 0.945727	training's QWK: 0.797465	valid_1's rmse: 0.938806	valid_1's QWK: 0.814201
[400]	training's rmse: 0.920672	training's QWK: 0.813302	valid_1's rmse: 0.922793	valid_1's QWK: 0.825779
[500]	training's rmse: 0.902415	training's QWK: 0.822338	valid_1's rmse: 0.91451	valid_1's QWK: 0.832676
[600]	training's rmse: 0.887646	training's QWK: 0.828793	valid_1's rmse: 0.910156	valid_1's QWK: 0.83823
[700]	training's rmse: 0.874647	training's QWK: 0.834527	valid_1's rmse: 0.906254	valid_1's QWK: 0.837652
[800]	training's rmse: 0.862997	training's QWK: 0.83922	valid_1's rmse: 0.903555	valid_1's QWK: 0.838964
[900]	training's rmse: 0.852342	training's QWK: 0.843279	valid_1's rmse: 0.901034	valid_1's QW

[7800]	training's rmse: 0.594292	training's QWK: 0.925076	valid_1's rmse: 0.863045	valid_1's QWK: 0.854462
[7900]	training's rmse: 0.592359	training's QWK: 0.925591	valid_1's rmse: 0.862836	valid_1's QWK: 0.853823
[8000]	training's rmse: 0.590368	training's QWK: 0.926112	valid_1's rmse: 0.862814	valid_1's QWK: 0.854192
[8100]	training's rmse: 0.588344	training's QWK: 0.926625	valid_1's rmse: 0.862598	valid_1's QWK: 0.854608
[8200]	training's rmse: 0.586403	training's QWK: 0.92711	valid_1's rmse: 0.862393	valid_1's QWK: 0.85478
[8300]	training's rmse: 0.584509	training's QWK: 0.927559	valid_1's rmse: 0.86218	valid_1's QWK: 0.85488
[8400]	training's rmse: 0.582577	training's QWK: 0.92804	valid_1's rmse: 0.862056	valid_1's QWK: 0.854952
[8500]	training's rmse: 0.580703	training's QWK: 0.928541	valid_1's rmse: 0.861901	valid_1's QWK: 0.854755
[8600]	training's rmse: 0.57875	training's QWK: 0.929003	valid_1's rmse: 0.861745	valid_1's QWK: 0.854682
[8700]	training's rmse: 0.576834	training's

In [249]:
val_preds0 = model.predict(val_data, num_iteration=model.best_iteration)

In [250]:
quadratic_weighted_kappa(np.rint(val_preds), val_target)

0.8582839049683022

In [10]:
import xgboost as xgb

In [23]:
xg_model0 = xgb.XGBRegressor(objective='reg:squarederror', 
                             colsample_bytree=0.3, learning_rate=0.1, 
                             max_depth=6, alpha=10, n_estimators=300)

In [24]:
xg_model0.fit(train_data, train_target)

XGBRegressor(alpha=10, base_score=0.5, booster=None, colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.3, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints=None,
             learning_rate=0.2, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=300, n_jobs=0, num_parallel_tree=1,
             objective='reg:squarederror', random_state=0, reg_alpha=10,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method=None,
             validate_parameters=False, verbosity=None)

In [26]:
xg_val_preds0 = xg_model0.predict(val_data)

In [27]:
quadratic_weighted_kappa(np.rint(xg_val_preds0), val_target)

0.842433937385245

In [286]:
quadratic_weighted_kappa(np.rint(0.01*xg_val_preds0 + 0.99*val_preds0), val_target)

0.8581618585211558

In [247]:
#torch.save(torch.from_numpy(train_data.mean(axis=0)), "../../train_mean_512x8x8.pth")

In [248]:
#torch.save(torch.from_numpy(val_data.mean(axis=0)), "../../val_mean_512x8x8.pth")