In [None]:
import os, sys
# sys.path = ['../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master', ] + sys.path

In [None]:
!pip install efficientnet_pytorch

In [None]:
#Basic Python and Machine learning libraries
import random, time, cv2
import pandas as pd
import numpy as np
from matplotlib.colors import from_levels_and_colors
import matplotlib.pyplot as plt
import skimage.io
from PIL import Image
from scipy import stats
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from IPython.display import display
from tqdm.notebook import tqdm

#Pytorch and Albumentations(Data Augmentation Library)
import torch
import albumentations
from albumentations.pytorch import ToTensorV2
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.functional import F 
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

from efficientnet_pytorch import EfficientNet

In [None]:
class Config:
    DEBUG = False
    pwd = '/kaggle/working/'
    data_dir = '../input/panda-dataset-medium-256-64-64/'
    train_img_dir = os.path.join(data_dir, 'train_images')
    train_mask_dir = os.path.join(data_dir, 'train_label_masks')
    test_img_dir = os.path.join(data_dir, 'test_images')
    orig_masks_dir = '../input/prostate-cancer-grade-assessment/train_label_masks'
    backbone = 'efficientnet-b0'
#     SUM_PREDICTION = True
    n_images_to_plot = 16
    n_folds = 2 if DEBUG else 5
    image_size = 64
    tile_size = 64
    n_tiles = 256
    out_dim = 5
    batch_size = 512
    num_workers = 4
    num_epochs = 2 if DEBUG else 4
    lr = 1e-4
    t_0 = 2
    SEED = 713
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    #Image-net standard mean and std
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    cmap, norm = from_levels_and_colors([1, 2, 3, 4, 5], ['black', 'gray', 'green', 'yellow', 'orange', 'red'], 'both')

In [None]:
print(Config.device)

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(Config.SEED)

In [None]:
train_df = pd.read_csv(Config.data_dir+'train.csv')

masks = os.listdir(Config.orig_masks_dir)
masks_df = pd.Series(masks).to_frame()
masks_df.columns = ['mask_file_name']
masks_df['image_id'] = masks_df.mask_file_name.apply(lambda x: x.split('_')[0])
train_df = pd.merge(train_df, masks_df, on='image_id', how='outer')
del masks_df
print(f"There are {len(train_df[train_df.mask_file_name.isna()])} images without a mask.")

## removing items where image mask is null
train_df = train_df[~train_df.mask_file_name.isna()]

print(len(train_df))
train_df.head()

In [None]:
print(len(train_df[train_df['gleason_score']=='0+0']['isup_grade']))
print(len(train_df[train_df['gleason_score']=='negative']['isup_grade']))

In [None]:
# trunc_train_df = train_df.copy()
train_df['gleason_score'] = train_df['gleason_score'].apply(lambda x: "0+0" if x=="negative" else x)

In [None]:
print(len(train_df[train_df['gleason_score']=='0+0']['isup_grade']))
print(len(train_df[train_df['gleason_score']=='negative']['isup_grade']))

In [None]:
if not Config.DEBUG:
    sample_to_drop = train_df[(train_df['isup_grade'] == 2) & (train_df['gleason_score'] == '4+3')].index
    train_df.drop(sample_to_drop, inplace=True)
    print(len(train_df))

In [None]:
train_df.drop(train_df[train_df.data_provider=='karolinska'].query('isup_grade != 0').index, inplace=True)
print(len(train_df))

In [None]:
train_df = train_df.sample(6).reset_index(drop=True) if Config.DEBUG else train_df.reset_index(drop=True)
print(len(train_df))
train_df.head()

In [None]:
class PANDA_Dataset(Dataset):
    def __init__(self,
                 df,
                 n_tiles,
                 transform=None,
                ):

        self.df = df.reset_index(drop=True)
        self.n_tiles = n_tiles
        self.transform = transform

    def __len__(self):
        return self.df.shape[0] * self.n_tiles

    def __getitem__(self, index):
        img_tile_path = os.path.join(Config.train_img_dir, self.df['image_id'].values[index // self.n_tiles]) + '_'
        mask_tile_path = os.path.join(Config.train_mask_dir, self.df['image_id'].values[index // self.n_tiles]) + '_mask_'
        
        img_tile = Image.open(img_tile_path + str(index % self.n_tiles) + '.png')
        img_tile = np.array(img_tile)
        
        mask_tile = Image.open(mask_tile_path + str(index % self.n_tiles) + '.png')
        mask_tile = np.array(mask_tile)
        
        (values,counts) = np.unique(mask_tile, return_counts=True)
        val_ind = np.argmax(counts)
        label = values[val_ind]
        if self.df['data_provider'].values[index // self.n_tiles] == 'radboud':
            if label > 1:
                label -= 1
        img_tile = img_tile.astype(np.float32)
#         print(masks.dtype)
#         masks = masks.astype(np.float32)
        img_tile /= 255
#         masks = np.where((masks == 2), 1, np.where((masks > 2), 2, masks))
        
        if self.transform is not None:
            transformed = self.transform(image=img_tile, mask=mask_tile)
            img_tile = transformed['image']
#             mask_tile = transformed['mask']
            
        img_tile = img_tile.transpose(2, 0, 1)
        return torch.tensor(img_tile), torch.tensor(label, dtype=torch.long)

In [None]:
class PANDA_Dataset_test(Dataset):
    def __init__(self,
                 df,
                 n_tiles,
                 transform=None,
                ):

        self.df = df.reset_index(drop=True)
        self.n_tiles = n_tiles
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        img_tile_path = os.path.join(Config.train_img_dir, self.df['image_id'].values[index]) + '_'
        mask_tile_path = os.path.join(Config.train_mask_dir, self.df['image_id'].values[index]) + '_mask_'
        isup_label = self.df['isup_grade'].values[index]
        
        imgs = np.zeros((self.n_tiles, 64, 64, 3))
        masks_label = np.zeros(self.n_tiles, dtype=np.uint8)
        for i in range(self.n_tiles):
            img_tile = Image.open(img_tile_path + str(i) + '.png')
            img_tile = np.array(img_tile)

            mask_tile = Image.open(mask_tile_path + str(i) + '.png')
            mask_tile = np.array(mask_tile)
            
            (values,counts) = np.unique(mask_tile, return_counts=True)
            val_ind = np.argmax(counts)
            label = values[val_ind]
            if self.df['data_provider'].values[index] == 'radboud':
                if label > 1:
                    label -= 1
                
            if self.transform is not None:
                transformed = self.transform(image=img_tile)
                img_tile = transformed['image']
            imgs[i] = img_tile
            masks_label[i] = label
            
        
        imgs = imgs.astype(np.float32)
        imgs /= 255
        imgs = imgs.transpose(0, 3, 1, 2)
        
        return torch.tensor(imgs), torch.tensor(masks_label, dtype=torch.long), isup_label

In [None]:
train_df.reset_index(drop=True, inplace=True)
train_df.head()

In [None]:
skf = StratifiedKFold(Config.n_folds, shuffle=True, random_state=Config.SEED)
train_df['fold'] = -1
for i, (tr_idx, val_idx) in enumerate(skf.split(train_df, train_df['isup_grade'])):
    train_df.loc[val_idx, 'fold'] = i
train_df.head()

In [None]:
# train_data = PANDA_Dataset(trunc_train_df, Config.n_tiles, None)

In [None]:
pretrained_model = {
    'efficientnet-b0': '../input/model-4ep/efficientnet-b0_fold_1.pt'
}

In [None]:
class BasicModel(nn.Module):
    def __init__(self, backbone, out_dim=5):
        super(BasicModel, self).__init__()
        self.enet = EfficientNet.from_pretrained('efficientnet-b0', num_classes=out_dim)
#         self.enet.load_state_dict(torch.load(pretrained_model[backbone]))
    
    def forward(self, x):
        x = self.enet(x)
        return x

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

# model = BasicModel(Config.backbone, Config.out_dim).to(Config.device)
# print(f'The model has {count_parameters(model):,} trainable parameters')

In [None]:
def train(model, iterator, optimizer, scheduler, epoch, criterion, device):
    
    epoch_loss = 0
    model.train()
    iter_len = len(iterator)
    for (data, target) in tqdm(iterator):
        
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        y_pred = model(data)
        loss = criterion(y_pred, target)
        loss.backward()
        optimizer.step()
        loss_np = loss.detach().cpu().numpy()
        epoch_loss += loss_np
        
    return epoch_loss/len(iterator)

# ДОПИСАТь
def evaluate(model, iterator, criterion, device):
    
    epoch_loss  = 0
    preds_list = []
    targets_list = []
    model.eval()
    
    with torch.no_grad():
        
        for (data, target, isup_target) in tqdm(iterator):
            data = data.to(device).squeeze()
            target = target.to(device).squeeze()
            logits = model(data)
            loss = criterion(logits, target)
            
            pred = torch.argmax(logits, dim=1)
            pred = pred.cpu().numpy() #pred - 256х1 np array
            # По pred вычисление isup -- > pred
            values = np.unique(pred)
            result = sorted(values, key = list(values).count, reverse = True)
            result = result[:2]
            
            isup_pred = 0
#             запарсить гавно
            if len(result) == 1:
                result.append(result[0])
            
            if result[0] == 0 or result[0] == 1:
                isup_pred = 0
            
            if result[0] == 2:
                if result[1] == 0 or result[1] == 1 or result[1] == 2:
                    isup_pred = 1
                elif result[1] == 3:
                    isup_pred = 2
                elif result[1] == 4:
                    isup_pred = 4
                    
            if result[0] == 3:
                if result[1] == 0 or result[1] == 1 or result[1] == 3:
                    isup_pred = 4
                elif result[1] == 2:
                    isup_pred = 3
                elif result[1] == 4:
                    isup_pred = 5
            
            if result[0] == 4:
                if result[1] == 2:
                    isup_pred = 4
                else:
                    isup_pred = 5
                
            preds_list.append(isup_pred)
            targets_list.append(isup_target)

            loss = loss.detach().cpu().numpy()

            epoch_loss += loss

    metric = metrics.cohen_kappa_score(preds_list, targets_list, weights='quadratic')
    
    return epoch_loss/len(iterator), metric


In [None]:
def fit_model(model, model_name, train_iterator, valid_iterator, optimizer, scheduler, loss_criterion, device, n_epochs, fold):
    """ Fits a dataset to model"""
    #Setting best validation loss to infinity :p
    best_valid_metric = -1.
    
    train_losses = []
    valid_losses = []
    valid_metric_scores = []
    
    #Let's loop through our data
    for epoch in range(n_epochs):
    
        start_time = time.time()
        
        print(f'Epoch: {epoch+1:02} | Training:')
        train_loss = train(model, train_iterator, optimizer, scheduler, epoch, loss_criterion, device)
        print(f'Epoch: {epoch+1:02} | Validating:')
        valid_loss, valid_metric_score = evaluate(model, valid_iterator, loss_criterion, device)
        
        scheduler.step()
        
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        valid_metric_scores.append(valid_metric_score)

        #Let's keep updating our model, so that we save only the best one at the end
        if valid_metric_score > best_valid_metric:
            best_valid_metric = valid_metric_score
            torch.save(model.state_dict(), f'{model_name}_fold_{fold}.pt')
    
        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
        #Printing and returning some important statistics
        print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f}')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Metric Score: {valid_metric_score:.3f}')
        
    return pd.DataFrame({f'{model_name}_fold_{fold}_Training_Loss':train_losses,  
                        f'{model_name}_fold_{fold}_Validation_Loss':valid_losses, 
                        f'{model_name}_fold_{fold}_Valid_Metric_Score':valid_metric_scores})

In [None]:
#This will simply plot the training statistics we returned
def plot_training_statistics(train_stats, model_name, fold):
    
    fig, axes = plt.subplots(2, figsize=(15,15))
    axes[0].plot(train_stats[f'{model_name}_fold_{fold}_Training_Loss'], label=f'{model_name}_fold_{fold}_Training_Loss')
    axes[0].plot(train_stats[f'{model_name}_fold_{fold}_Validation_Loss'], label=f'{model_name}_fold_{fold}_Validation_Loss')
    axes[1].plot(train_stats[f'{model_name}_fold_{fold}_Valid_Metric_Score'], label=f'{model_name}_fold_{fold}_Valid_Metric_Score')
    
    axes[0].set_xlabel("Number of Epochs"), axes[0].set_ylabel("Loss")
    axes[1].set_xlabel("Number of Epochs"), axes[1].set_ylabel("Score on Metric")
    
    axes[0].legend(), axes[1].legend()

In [None]:
train_transforms = albumentations.Compose([
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.Normalize(mean=Config.mean, std=Config.std, always_apply=True)
])
test_transforms = albumentations.Compose([
    albumentations.Normalize(mean=Config.mean, std=Config.std, always_apply=True)
])

In [None]:
fold = 1
print(f"Fitting on Fold {fold+1}")
#Make Train and Valid DataFrame from fold
train_df_fold = train_df[train_df['fold'] != fold]
valid_df_fold = train_df[train_df['fold'] == fold]
    
#Build and load Dataset
train_data = PANDA_Dataset(train_df_fold, Config.n_tiles, train_transforms)
valid_data = PANDA_Dataset_test(valid_df_fold, Config.n_tiles, test_transforms)
train_iterator = DataLoader(train_data, shuffle=True, batch_size=Config.batch_size, num_workers=Config.num_workers)
valid_iterator = DataLoader(valid_data, batch_size=1, num_workers=Config.num_workers)
    
#Initialize model, loss and optimizer
model = BasicModel(Config.backbone, Config.out_dim).to(Config.device)
# model.load_state_dict(torch.load(pretrained_model[Config.backbone], map_location=Config.device))
loss_criterion = nn.CrossEntropyLoss().to(Config.device)
optimizer=optim.Adam(model.parameters(), lr=Config.lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, Config.num_epochs)
    
#Fit the model and visualize the training curves
train_stats = fit_model(model, 'efficientnet-b0', train_iterator, valid_iterator, 
                    optimizer, scheduler, loss_criterion, Config.device, Config.num_epochs, fold)
plot_training_statistics(train_stats, 'efficientnet-b0', fold)
    
#Just making sure that the output looks neat
print('\n')
print('-------------------------------------------------------')
print('\n')

In [None]:
print('hello hell!')