In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF

import torch.utils.data as data
import os
import random
import glob
from PIL import Image
import matplotlib.pyplot as plt

import math
import random
import numpy as np

## Dataset

In [2]:
def preprocess(image, mask, flip=False, scale=None, crop=None):
    if flip:
        if random.random() < 0.5:
            image = image.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
            mask = mask.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
            
    if scale:
        w, h = image.size
        rand_log_scale = math.log(
            scale[0], 2) + random.random() * (math.log(scale[1], 2) - math.log(scale[0], 2))
        random_scale = math.pow(2, rand_log_scale)
        new_size = (int(round(w * random_scale)), int(round(h * random_scale)))
        image = image.resize(new_size, Image.Resampling.LANCZOS)
        mask = mask.resize(new_size, Image.Resampling.NEAREST)

    data_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    image = data_transforms(image)
    mask = TF.to_tensor(mask).round().long()

    # to 1D mask HACK
    mask = (mask[0] + mask[1] * 2).clamp(max=2)
    
    if crop:
        h, w = image.shape[1], image.shape[2]
        pad_tb = max(0, crop[0] - h)
        pad_lr = max(0, crop[1] - w)
        image = torch.nn.ZeroPad2d((0, pad_lr, 0, pad_tb))(image)
        mask = torch.nn.ConstantPad2d((0, pad_lr, 0, pad_tb), 255)(mask)

        h, w = image.shape[1], image.shape[2]
        i = random.randint(0, h - crop[0])
        j = random.randint(0, w - crop[1])
        image = image[:, i:i + crop[0], j:j + crop[1]]
        mask = mask[i:i + crop[0], j:j + crop[1]]

    return image, mask

In [3]:
_DATA_FORMAT_MAP = {
    'image': 'jpeg',
    'label': 'jpeg',
}

_DATA_FOLDER_SUFFIX = {
    'image': '',
    'label': '_gt'
}


class NeoPolyp(data.Dataset):
    def __init__(self, root, train=True, transform=None, target_transform=None, crop_size=None):
        self.root = root
        self.transform = transform
        self.target_transform = target_transform
        self.train = train
        self.crop_size = crop_size

        dataset_split = 'train' if self.train else 'val'
        self.images = self._get_files('image', dataset_split)
        self.masks = self._get_files('label', dataset_split)

    def __getitem__(self, index):
        _img = Image.open(self.images[index]).convert('RGB')
        _target = Image.open(self.masks[index])

        _img, _target = preprocess(_img, _target,
                                   flip=True if self.train else False,
                                   scale=(0.66, 1.5) if self.train else None,
                                   crop=self.crop_size if self.train else (1024, 1280))

        if self.transform is not None:
            _img = self.transform(_img)

        if self.target_transform is not None:
            _target = self.target_transform(_target)

        return _img, _target

    def _get_files(self, data, dataset_split):
        folder_name = dataset_split + _DATA_FOLDER_SUFFIX[data]
        pattern = '*.{}'.format(_DATA_FORMAT_MAP[data])
        search_files = os.path.join(
            self.root, folder_name, folder_name, pattern)
        filenames = glob.glob(search_files)
        return sorted(filenames)

    def __len__(self):
        return len(self.images)

In [4]:
train_data = NeoPolyp("/kaggle/input/bkai-neopolyp/bkai-igh-neopolyp", train=True, crop_size=(640, 800))
val_data = NeoPolyp("/kaggle/input/bkai-neopolyp/bkai-igh-neopolyp", train=False, crop_size=(1024, 1280))

In [5]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=4, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=False, num_workers=2)

## Model (DeeplabV3)

In [6]:
from torchvision.models.segmentation import deeplabv3_resnet50
model = deeplabv3_resnet50(num_classes=3).cuda()
model.train();

In [7]:
criterion = nn.CrossEntropyLoss(ignore_index=255)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

## Training and evaluation

In [8]:
from torchmetrics import Dice, JaccardIndex

kwargs = {
    "num_classes": 3, 
    "ignore_index": 0
}
train_dice, eval_dice, train_iou, eval_iou = Dice(**kwargs).cuda(), Dice(**kwargs).cuda(), JaccardIndex(**kwargs).cuda(), JaccardIndex(**kwargs).cuda()

In [9]:
def evaluate(model):
    model.eval()
    with torch.no_grad():
        for i, (inputs, target) in enumerate(val_loader):
            inputs, target = inputs.cuda(), target.cuda()
            outputs = model(inputs)['out']

            eval_pred = outputs.argmax(dim=1)
            target = torch.where(target > 2, 0, target)
            eval_dice_metric, eval_iou_metric = eval_dice(eval_pred, target), eval_iou(eval_pred, target)
        eval_dice_metric, eval_iou_metric = eval_dice.compute(), eval_iou.compute()
        print(f"Val metric on epoch {epoch}: {eval_dice_metric.item():.4f}, {eval_iou_metric.item():.4f}")
        eval_dice.reset(), eval_iou.reset()

In [None]:
start_epoch = 0
epochs = 10

for epoch in range(start_epoch, epochs):
    model.train()
    for i, (inputs, target) in enumerate(train_loader):
        inputs, target = inputs.cuda(), target.cuda()
        outputs = model(inputs)['out']
        loss = criterion(outputs, target)
#         if np.isnan(loss.item()) or np.isinf(loss.item()):
#             pdb.set_trace()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        train_pred = outputs.argmax(dim=1)
        target = torch.where(target > 2, 0, target)
        train_dice_metric, train_iou_metric = train_dice(train_pred, target), train_iou(train_pred, target)
        if (i+1) % 20 == 0:
            print('epoch: {0}\t'
                  'iter: {1}/{2}\t'
                  'loss: {3:.4f})'.format(epoch, i+1, len(train_loader), loss.item()))
            print(f"Train metrics on batch {i+1}: {train_dice_metric.item():.4f}, {train_iou_metric.item():.4f}")
            
    train_dice_metric, train_iou_metric = train_dice.compute(), train_iou.compute()
    print(f"Train metrics on epoch {epoch}: {train_dice_metric.item():.4f}, {train_iou_metric.item():.4f}")
    
    train_dice.eval(), train_iou.eval()
    evaluate(model)
    print("-------------------")
    if epoch % 10 == 9:
        torch.save({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, "deeplab.pth")

## Generate output

In [None]:
import numpy as np
import pandas as pd
import cv2
import os

def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

def rle_encode_one_mask(mask):
    pixels = mask.flatten()
    pixels[pixels > 0] = 255
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return rle_to_string(rle)

def rle2mask(mask_rle, shape=(3,3)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [None]:
@torch.no_grad()
def mask2string(dir):
    model.eval()
    ## mask --> string
    strings = []
    ids = []
    data_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    for image_id in os.listdir(dir):
        id = image_id.split('.')[0]
        path = os.path.join(dir, image_id)
#         print(path)
#         img = cv2.imread(path)[:,:,::-1]

        img = data_transforms(Image.open(path)).unsqueeze(0).cuda()
    
        output = model(img)['out']
        pred = output.argmax(dim=1).cpu().numpy()
        
        for mask in range(2):
            ids.append(f'{id}_{mask}')
            string = rle_encode_one_mask(pred==mask+1)
            strings.append(string)
    r = {
        'ids': ids,
        'strings': strings,
    }
    return r

In [None]:
MASK_DIR_PATH = '../input/bkai-neopolyp/bkai-igh-neopolyp/test/test' # change this to the path to your output mask folder
model.eval()
res = mask2string(MASK_DIR_PATH)
df = pd.DataFrame(columns=['Id', 'Expected'])
df['Id'] = res['ids']
df['Expected'] = res['strings']

df.to_csv(r'output.csv', index=False)