In [1]:
import os
import random
from tqdm import tqdm
import pandas as pd
import numpy as np
from glob import glob
import gc
import time
from collections import defaultdict
import  matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import copy
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from torch.cuda import amp
import torch.optim as optim
import albumentations as A
import segmentation_models_pytorch as smp

from colorama import Fore, Back, Style
c_  = Fore.GREEN
sr_ = Style.RESET_ALL

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CFG:
    seed          = 42
    debug         = False # set debug=False for Full Training
    exp_name      = 'baseline'
    comment       = 'unet-efficientnet_b1-512x512'
    output_dir    = './'
    model_name    = 'Unet'
    backbone      = 'efficientnet-b1'
    train_bs      = 16
    valid_bs      = 32
    img_size      = [512, 512]
    epochs        = 15
    n_accumulate  = max(1, 64//train_bs)
    lr            = 2e-3
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(2279/(train_bs*n_accumulate)*epochs)+50
    T_0           = 25
    warmup_epochs = 0
    wd            = 1e-6
    n_fold        = 5
    num_classes   = 1
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    weigth        = 'weight/unit_baseline_kaagle.bin'
    gt_df         = "data/gt.csv"
    data_root     = "data"
    train_groups = ["kidney_1_dense"]
    valid_groups = ["kidney_3_dense"]
    loss_func     = "DiceLoss"

    data_transforms = {
        "train": A.Compose([
            A.Resize(*img_size, interpolation=cv2.INTER_NEAREST),
            A.HorizontalFlip(p=0.5),
        ], p=1.0),
        
        "valid": A.Compose([
            A.Resize(*img_size, interpolation=cv2.INTER_NEAREST),
        ], p=1.0)
    }

In [3]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
set_seed(CFG.seed)

# DataLoader

In [4]:
def load_img(path):
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    img = np.tile(img[...,None], [1, 1, 3]) # gray to rgb
    img = img.astype('float32') # original is uint16
    mx = np.max(img)
    if mx:
        img/=mx # scale image to [0, 1]
    return img

def load_msk(path):
    msk = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    msk = msk.astype('float32')
    msk/=255.0
    return msk

In [5]:
class BuildDataset(torch.utils.data.Dataset):
    def __init__(self, img_paths, msk_paths=[], transforms=None):
        self.img_paths  = img_paths
        self.msk_paths  = msk_paths
        self.transforms = transforms
        
    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, index):
        img_path  = self.img_paths[index]
        img = load_img(img_path)
        
        if len(self.msk_paths)>0:
            msk_path = self.msk_paths[index]
            msk = load_msk(msk_path)
            if self.transforms:
                data = self.transforms(image=img, mask=msk)
                img  = data['image']
                msk  = data['mask']
            img = np.transpose(img, (2, 0, 1))
            return torch.tensor(img), torch.tensor(msk)
        else:
            orig_size = img.shape
            if self.transforms:
                data = self.transforms(image=img)
                img  = data['image']
            img = np.transpose(img, (2, 0, 1))
            return torch.tensor(img), torch.tensor(np.array([orig_size[0], orig_size[1]]))

In [14]:
valid_groups = CFG.valid_groups
gt_df = pd.read_csv(CFG.gt_df)
gt_df["img_path"] = gt_df["img_path"].apply(lambda x: os.path.join(CFG.data_root, x))
gt_df["msk_path"] = gt_df["msk_path"].apply(lambda x: os.path.join(CFG.data_root, x))
gt_df['img_path'] = gt_df['img_path'].map(lambda x: x.replace('blood-vessel-segmentation',''))
gt_df['msk_path'] = gt_df['msk_path'].map(lambda x: x.replace('blood-vessel-segmentation',''))
valid_df = gt_df.query("group in @valid_groups").reset_index(drop=True)
valid_img_paths = valid_df["img_path"].values.tolist()
valid_msk_paths = valid_df["msk_path"].values.tolist()
if CFG.debug:
    valid_img_paths = valid_img_paths[:CFG.valid_bs*3]
    valid_msk_paths = valid_msk_paths[:CFG.valid_bs*3]

# Model

In [6]:
def build_model(backbone, num_classes, device):
    model = smp.Unet(
        encoder_name=backbone,      # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
        encoder_weights="imagenet",     # use `imagenet` pre-trained weights for encoder initialization
        in_channels=3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
        classes=num_classes,        # model output channels (number of classes in your dataset)
        activation=None,
    )
    model.to(device)
    return model

In [7]:
model = build_model(CFG.backbone, CFG.num_classes, CFG.device)

In [9]:
checkpoint = torch.load('weight\state first_baseline_unet 9.pth')


model.load_state_dict(checkpoint['model_state'])
model.eval();

In [10]:
#model.load_state_dict(torch.load(CFG.weigth))
#model.load_state_dict(torch.load('model_weights.pth'))
#model.eval();

In [10]:
torch.cuda.is_available()

True

In [15]:
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    rle = ' '.join(str(x) for x in runs)
    if rle=='':
        rle = '1 0'
    return rle

In [16]:
test_dataset = BuildDataset(valid_img_paths, [], transforms=CFG.data_transforms['valid'])
test_loader = DataLoader(test_dataset, batch_size=CFG.valid_bs, num_workers=0, shuffle=False, pin_memory=True)

In [24]:
test_dataset = BuildDataset(valid_img_paths, [], transforms=CFG.data_transforms['valid'])
test_loader = DataLoader(test_dataset, batch_size=CFG.valid_bs, num_workers=0, shuffle=False, pin_memory=True)
### Inference
rles = []
pbar = tqdm(enumerate(test_loader), total=len(test_loader), desc='Inference ')
for step, (images, shapes) in pbar:
    shapes = shapes.numpy()
    images = images.to(CFG.device, dtype=torch.float)
    with torch.no_grad():
        preds = model(images)
        preds = (nn.Sigmoid()(preds)>0.5).double()
    preds = preds.cpu().numpy().astype(np.uint8)

    for pred, shape in zip(preds, shapes):
        pred = cv2.resize(pred[0], (shape[1], shape[0]), cv2.INTER_NEAREST)
        rle = rle_encode(pred)
        rles.append(rle)

Inference :   0%|          | 0/16 [00:00<?, ?it/s]

Inference : 100%|██████████| 16/16 [02:49<00:00, 10.61s/it]


In [25]:
ids = [f'{p.split("/")[-3]}_{os.path.basename(p).split(".")[0]}' for p in valid_img_paths]
submission = pd.DataFrame.from_dict({
    "id": ids,
    "rle": rles
})
submission.head()

Unnamed: 0,id,rle
0,kidney_3_sparse_0496,276842 4 278351 6 279862 5 281371 6 282879 11 ...
1,kidney_3_sparse_0497,367649 3 369158 5 370668 5 372178 6 373685 11 ...
2,kidney_3_sparse_0498,367649 6 369158 8 370668 8 372178 8 373685 11 ...
3,kidney_3_sparse_0499,367649 3 369158 5 370668 5 372178 6 373688 8 3...
4,kidney_3_sparse_0500,363118 5 364628 6 366138 6 367648 7 369158 8 3...


In [26]:
_gt_df = pd.merge(gt_df, submission.loc[:, ["id"]], on="id").reset_index(drop=True)
_gt_df.head()

Unnamed: 0,id,rle,group,slice,height,width,img_path,msk_path
0,kidney_3_sparse_0496,332888 1 334398 1 352510 2 354019 3 355529 4 3...,kidney_3_sparse,496,1706,1510,data\/train/kidney_3_sparse/images/0496.tif,data\/train/kidney_3_sparse/labels/0496.tif
1,kidney_3_sparse_0497,331377 2 332887 2 352509 3 354018 4 355529 3 3...,kidney_3_sparse,497,1706,1510,data\/train/kidney_3_sparse/images/0497.tif,data\/train/kidney_3_sparse/labels/0497.tif
2,kidney_3_sparse_0498,350998 3 352508 4 354018 4 355528 4 361608 3 3...,kidney_3_sparse,498,1706,1510,data\/train/kidney_3_sparse/images/0498.tif,data\/train/kidney_3_sparse/labels/0498.tif
3,kidney_3_sparse_0499,350997 3 352507 4 354018 3 355528 3 360099 3 3...,kidney_3_sparse,499,1706,1510,data\/train/kidney_3_sparse/images/0499.tif,data\/train/kidney_3_sparse/labels/0499.tif
4,kidney_3_sparse_0500,349488 1 350997 3 352507 3 354017 3 355528 2 3...,kidney_3_sparse,500,1706,1510,data\/train/kidney_3_sparse/images/0500.tif,data\/train/kidney_3_sparse/labels/0500.tif


In [27]:
_gt_df.shape

(501, 8)

In [21]:
from Surface_Dice_Metric.metric import *

In [28]:
val_score = score(submission, _gt_df, "id", "rle", 0.0, "group", "slice")

In [31]:
print(val_score)

0.18812598854384951


In [29]:
print(val_score)

0.18812598854384951


In [27]:
print(val_score)

0.1291427943658296
