In [None]:
!pip install /kaggle/input/humapsubmit/timm-0.4.5-py3-none-any.whl
!pip install /kaggle/input/humapsubmit/coolname-1.1.0-py2.py3-none-any.whl
!pip install /kaggle/input/humapsubmit/runx-0.0.10-py3-none-any.whl

In [None]:
import sys
sys.path.insert(0, "/kaggle/input/humapcode/")
sys.path.insert(0, "/kaggle/input/humapsubmit/")
sys.path.insert(0, "/kaggle/input/humapcode/semantic_segmentation2")

In [None]:
import numpy as np, pandas as pd, os, random, cv2, gc
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torch.cuda.amp import autocast, GradScaler
from pathlib import Path
import albumentations as A
from matplotlib import pyplot as plt

In [None]:
IMAGE_MEAN = [0.640, 0.473, 0.684]
IMAGE_STD = [0.160, 0.227, 0.143]



def seed_torchv2(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)



## kaggle help function
# https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def rle2mask(mask_rle, shape=(1600, 256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

# https://www.kaggle.com/bguberfain/memory-aware-rle-encoding
# with transposed mask
def rle_encode_less_memory(img):
    # the image should be transposed
    pixels = img.T.flatten()

    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]

    return ' '.join(str(x) for x in runs)




def make_grid(shape, window=256, min_overlap=32):
    """
        Return Array of size (N,4), where N - number of tiles,
        2nd axis represente slices: x1,x2,y1,y2
    """
    x, y = shape
    nx = x // (window - min_overlap) + 1
    x1 = np.linspace(0, x, num=nx, endpoint=False, dtype=np.int64)
    x1[-1] = x - window
    x2 = (x1 + window).clip(0, x)
    ny = y // (window - min_overlap) + 1
    y1 = np.linspace(0, y, num=ny, endpoint=False, dtype=np.int64)
    y1[-1] = y - window
    y2 = (y1 + window).clip(0, y)
    slices = np.zeros((nx, ny, 4), dtype=np.int64)

    for i in range(nx):
        for j in range(ny):
            slices[i, j] = x1[i], x2[i], y1[j], y2[j]
    return slices.reshape(nx * ny, 4)


In [None]:
##  model
from unet import Smp_Unet, Smp_Unetplusplus
from semantic_segmentation2.network.deepv3 import DeepV3PlusSRNX50, DeepV3PlusSRNX101


import rasterio
from rasterio.windows import Window


# config

In [None]:
os.listdir("/kaggle/input/")

In [None]:
class Config:
    seed = 42
    folds = 5

    WINDOW = 1536
    WINDOW_OVERLAP = WINDOW // 4

    S_TH = 40
    P_TH = 1000 * (WINDOW // 384) ** 2


    root_dir = Path("/kaggle/input/hubmap-kidney-segmentation/test/")


    dataset_params = {
        "test": {
            "transformers": A.Compose([

            ]),
            "batch_size": 16,
            "shuffle": False,
            "pin_memory": False,
            "num_workers": 0,
        },

    }


In [None]:
class HuMapTestDataset(Dataset):
    def __init__(self, name, phase, transformers=None):
        self.name = name
        self.data = rasterio.open(name)
        self.layers = []
        if self.data.count != 3:
            for i, subdataset in enumerate(self.data.subdatasets):
                self.layers.append(rasterio.open(subdataset))
        else:
            assert self.data.count == 3, f"{self.data.count}"
        
        
        self.phase = phase
        self.transformers = transformers
        self.window = Config.WINDOW
        self.window_overlap = Config.WINDOW_OVERLAP
        self.slices = make_grid((self.data.shape[1], self.data.shape[0]), window=Config.WINDOW, min_overlap=Config.WINDOW_OVERLAP)
        self.S_TH = Config.S_TH
        self.P_TH = Config.P_TH

    def normalize(self, image):
        image = image.astype(np.float32)
        image = image / 255.
        image -= IMAGE_MEAN
        image /= IMAGE_STD
        return np.transpose(image, axes=[2, 0, 1])

    def get_data_hw(self):
        return self.data.shape[0], self.data.shape[1]

    def __len__(self):
        return len(self.slices)

    def grab(self, slice):
        x1,x2,y1,y2 = slice
        if self.data.count == 3:
            image = self.data.read([1, 2, 3], window=Window.from_slices((y1, y2), (x1, x2)))
            image = np.moveaxis(image, 0, -1)
        else:
            image_l = []
            for i, layer in enumerate(self.layers):
                image_l.append(layer.read(1, window=Window.from_slices((y1, y2), (x1, x2))))
            image = np.stack(image_l, axis=-1)

        if image.shape[0] != self.window or image.shape[1] != self.window:
            assert y2 == self.data.shape[0] or x2 == self.data.shape[1], f"{y2} {self.data.shape[0]} {x2} {self.data.shape[1]}"
            pad0 = pad1 = 0
            if image.shape[0] != self.window:
                pad0 = self.window - image.shape[0]
            if image.shape[1] != self.window:
                pad1 = self.window - image.shape[1]
            image = np.pad(image, ((0,pad0),(0,pad1),(0,0)), mode="constant", constant_values=0)

        assert image.shape[0] == image.shape[1] == Config.WINDOW and image.shape[
            2] == 3, f"{image.shape[0]} {image.shape[1]} {image.shape[2]}"

        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)


        if (s > self.S_TH).sum() <= self.P_TH or image.sum() <= self.P_TH:
            valid_image = False
        else:
            valid_image = True

        return image, valid_image


    def __getitem__(self, item):
        sliceh = self.slices[item]
        image, valid_image = self.grab(sliceh)

        if self.transformers is not None:
            aug = self.transformers(image=image)
            image = aug["image"]

        return {
            "image": self.normalize(image),
            "slice": sliceh,
            "valid_image": valid_image
        }


def collect_fn(batches):
    results = {}
    for k in batches[0].keys():
        results[k] = []
    for batch in batches:
        for k,v in batch.items():
            if k == "image":
                v = torch.from_numpy(v)
            results[k].append(v)
    if "image" in results.keys():
        results["image"] = torch.stack(results["image"], dim=0)

    return results



def get_dataloader(name, phase):
    dataset_params = Config.dataset_params[phase]

    datasets = HuMapTestDataset(name, phase, transformers=dataset_params["transformers"])

    dataloader = DataLoader(
        datasets,
        batch_size=dataset_params["batch_size"],
        shuffle=dataset_params["shuffle"],
        pin_memory=dataset_params["pin_memory"],
        num_workers=dataset_params["num_workers"],
        drop_last=False,
        collate_fn=collect_fn,
    )
    return datasets, dataloader
    

In [None]:
## classification model
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x):
        return torch.cat([self.mp(x), self.ap(x)], 1)

def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)


from torch.nn import Parameter
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)

    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

from pytorch_image_models.timm.models.resnet import resnet18, resnet34, resnet50, resnet50d, resnext50_32x4d,\
    ssl_resnext50_32x4d, ssl_resnext101_32x4d, ssl_resnet18
from pytorch_image_models.timm.models.senet import legacy_seresnext50_32x4d, legacy_seresnext101_32x4d, legacy_senet154

from pytorch_image_models.timm.models.densenet import densenet121, densenet169
from pytorch_image_models.timm.models.efficientnet import *
from pytorch_image_models.timm.models.resnest import resnest50d, resnest50d_4s2x40d, resnest50d_1s4x24d, resnest101e
from pytorch_image_models.timm.models.resnet import *
from pytorch_image_models.timm.models.resnet import ecaresnet50d, ecaresnet50t
from pytorch_image_models.timm.models.resnet import seresnext50_32x4d
from pytorch_image_models.timm.models.res2net import *
from pytorch_image_models.timm.models.res2net import res2net50_26w_4s

from pytorch_image_models.timm.models.nfnet import *

class ResNest50OrgBackbone(nn.Module):
    def __init__(self, name, pretrained=True):
        super(ResNest50OrgBackbone, self).__init__()
        print(torch.hub.list("zhanghang1989/ResNeSt"))
        self.net = torch.hub.load("zhanghang1989/ResNeSt", name, pretrained=pretrained)
        self.num_features = 2048

    def forward(self, x):
        x = self.net.conv1(x)
        x = self.net.bn1(x)
        x = self.net.relu(x)

        x = self.net.maxpool(x)
        x = self.net.layer1(x)
        x = self.net.layer2(x)
        x = self.net.layer3(x)
        x = self.net.layer4(x)

        return x
    def forward_features(self, x):
        x = self.net.conv1(x)
        x = self.net.bn1(x)
        x = self.net.relu(x)

        x = self.net.maxpool(x)
        x = self.net.layer1(x)
        x = self.net.layer2(x)
        x = self.net.layer3(x)
        x = self.net.layer4(x)

        return x


class Net(nn.Module):
    def __init__(self, backbone, multidrop=False, pool_type="avg", pretrained=False):
        super(Net, self).__init__()
        if backbone== "resnest50" or backbone == "resnest101" or backbone == "resnest50_fast_1s1x64d":
            self.model = ResNest50OrgBackbone(backbone, pretrained=pretrained)
        else:
            self.model = eval(f"{backbone}(pretrained=pretrained)")

        self.model_type_name = type(self.model).__name__

        if pool_type == "avg":
            self.avgpool = nn.AdaptiveAvgPool2d(1)
        elif pool_type == "max":
            self.avgpool = nn.AdaptiveMaxPool2d(1)
        elif pool_type == "gem":
            self.avgpool = GeM()
        elif pool_type == "none":
            self.avgpool = nn.Identity()
        else:
            self.avgpool = AdaptiveConcatPool2d(1)

        self.multi_drop = multidrop
        if self.multi_drop:
            self.flat = Flatten()
            self.dropouts = nn.ModuleList([
                nn.Dropout(0.5) for _ in range(5)
            ])
            self.logits = nn.Linear(self.model.num_features, 1)
        else:
            self.logits = nn.Sequential(
                Flatten(),
                nn.Dropout(0.5),
                nn.Linear(self.model.num_features, 1)
            )

    def forward(self, x):
        x = self.model.forward_features(x)
        x = self.avgpool(x)

        if self.multi_drop:
            x = self.flat(x)
            for i, dropout in enumerate(self.dropouts):
                if i == 0:
                    out = self.logits(dropout(x))
                else:
                    out += self.logits(dropout(x))
            out /= len(self.dropouts)
            x = out
        else:
            x = self.logits(x)
        return x


In [None]:
def get_trans(img, I):

    if I >= 4:
        img = img.transpose(2, 3)
    if I % 4 == 0:
        return img
    elif I % 4 == 1:
        return img.flip(2)
    elif I % 4 == 2:
        return img.flip(3)
    elif I % 4 == 3:
        return img.flip(2).flip(3)

    
def get_trans_invert(img, I):
    if I % 4 == 0:
        img = img
    elif I % 4 == 1:
        img = img.flip(2)
    elif I % 4 == 2:
        img = img.flip(3)
    elif I % 4 == 3:
        img = img.flip(3).flip(2)

    if I >= 4:
        img = img.transpose(2, 3)

    return img

def model_predict_wrapper(model, inputs, model_mode, n_tta=1):
    assert model_mode in ["cls", "seg"]
    oups = []
    for I in range(n_tta):
        oup = model(get_trans(inputs, I))
        if model_mode == "cls":
            oups.append(oup)
        else:
            oups.append(get_trans_invert(oup, I))
    return sum(oups) / len(oups)

    
    

# CONFIG

In [None]:
CLS_THRESHOLD = 0.42
MASK_THRESHOLD = 0.5
print("window is {}  overlap is {}  mask_threshold is {}".format(Config.WINDOW, Config.WINDOW_OVERLAP, MASK_THRESHOLD))

TTA = 8

In [None]:
def submit():
    print()
    print("your window is {}  overlap-window is {}".format(Config.WINDOW, Config.WINDOW_OVERLAP))
    print("cls threshold is ", CLS_THRESHOLD)
    print("mask threshold is ", MASK_THRESHOLD)
    print()

    # size window window-overlap
    cls_log_dirs_model_name = [
#         [Path("/kaggle/input/humapsubmit3/eca_nfnet_l1_focal_384_1536_384_multidrop_avg/"), "eca_nfnet_l1", 384],
#         [Path("/kaggle/input/humapsubmit3/resnest50d_focal_384_1536_384_multidrop_avg/"), "resnest50d", 384],
#         [Path("/kaggle/input/humapsubmit3/tf_efficientnet_b3_ns_focal_384_1536_384_multidrop_avg/"), "tf_efficientnet_b3_ns", 384],
    ]

    seg_log_dirs_model_name = [
        [Path("/kaggle/input/humapsubmitpseudo/eca_nfnet_l1_bce_384_1536_384"), "eca_nfnet_l1", 384],
        [Path("/kaggle/input/humapsubmitpseudo/resnest50d_bce+lovasz_384_1536_384"), "resnest50d", 384],
        [Path("/kaggle/input/humapsubmitpseudo/resnest50d_bce01lovasz_768_1536_384"), "resnest50d", 768],
    ]

    print("cls model loading!!!!")
    cls_models = []
    for cls_log_dir, cls_model_name, size in cls_log_dirs_model_name:
        for fold in [0,1,2,3,4 ]:
            cls_model = Net(backbone=cls_model_name, pretrained=False, multidrop=True, pool_type="avg")
            cls_ckpt_path = str(cls_log_dir / f"model_{fold}_swa.pth")
            print("cls ckpt path : ", cls_ckpt_path)
            cls_model.load_state_dict(torch.load(cls_ckpt_path))
            cls_model.cuda()
            cls_model.eval()
            cls_models.append([cls_model, size])

    print("seg model loading!!!!!")
    seg_models = []
    for seg_log_dir, seg_model_name, size in seg_log_dirs_model_name:
        for fold in [0,1,2,3,4 ]:
            seg_model = Smp_Unet(backbone=seg_model_name, pretrained=False, deepsup=False, center=False, kaggle=True)
#             seg_model = eval(f"{seg_model_name}(1, None, pretrained=False)")
            if "768" in str(seg_log_dir):
                seg_ckpt_path = seg_log_dir / f"model_{fold}.pth"
            else:
                seg_ckpt_path = seg_log_dir / f"model_{fold}_swa.pth"
            print("seg ckpt path : ", seg_ckpt_path)
            seg_model.load_state_dict(torch.load(seg_ckpt_path))
            seg_model.cuda()
            seg_model.eval()
            seg_models.append([seg_model, size])

    subm = {}

    names = list(Config.root_dir.glob("*.tiff"))
    print("the number of test samples is ", len(names))
    for name_ind, name in enumerate(names[::-1]):
        image_id = name.stem
        print("image_id is ", image_id)
        test_dataset, test_dl = get_dataloader(name, "test")

        preds = np.zeros(test_dataset.get_data_hw(), dtype=np.uint8)

        for bi, batch in enumerate(tqdm(test_dl)):
            images = batch["image"]
            valid_inputs = batch["valid_image"]
            images = images[np.array(valid_inputs, dtype=np.bool)]

            slices = np.array(batch["slice"], dtype=np.int64)[np.array(valid_inputs, dtype=np.bool)]
            #             assert ((slices[:, 1] - slices[:, 0]) == Config.WINDOW).all()
            #             assert ((slices[:, 3] - slices[:, 2]) == Config.WINDOW).all()
            assert images.size(2) == images.size(
                3) == Config.WINDOW, f"{images.size(2)} {images.size(3)} {Config.WINDOW}"
            if images.size(0) == 0:
                continue

            images = images.cuda()

            cls_outputs = []
            for model, size in cls_models:
                if size != Config.WINDOW:
                    inputs = F.interpolate(images, size=(size, size), mode="area")
                    output = model_predict_wrapper(model, inputs, "cls", TTA)
                    output = torch.sigmoid(output)
                    cls_outputs.append(output)
                else:
                    assert False

            if len(cls_outputs) != 0:
                cls_outputs = sum(cls_outputs) / len(cls_outputs)
                cls_outputs = (cls_outputs > CLS_THRESHOLD).detach()

                images = images[cls_outputs.squeeze(1)]
                slices = slices[cls_outputs.squeeze(1).cpu().numpy()]
                assert images.shape[0] == slices.shape[0], f"{images.shape[0]} - {slices.shape[0]}"
                if images.size(0) == 0:
                    continue

            outputs = []
            for model, size in seg_models:
                if size != Config.WINDOW:
                    inputs = F.interpolate(images, size=(size, size), mode="area")
                    #                     output = model(inputs)
                    output = model_predict_wrapper(model, inputs, "seg", TTA)
                    output = F.interpolate(output, size=(Config.WINDOW, Config.WINDOW), mode="bilinear")
                else:
                    assert False
                    inputs = images
                    #                     output = model(inputs)
                    output = model_predict_wrapper(model, inputs, TTA)
                output = torch.sigmoid(output)
                outputs.append(output)

            outputs = sum(outputs) / len(outputs)

            outputs = (outputs >= MASK_THRESHOLD).detach().cpu().numpy().astype(np.uint8)

            assert len(outputs) == len(slices)
            for o, sl in zip(outputs, slices):
                x1, x2, y1, y2 = sl
                try:
                    preds[y1:y2, x1:x2] = (
                                (o[0][:(y2 - y1), :(x2 - x1)].astype(np.uint8) + preds[y1:y2, x1:x2]) > 0).astype(
                        np.uint8)
                except:
                    print(x1, x2, y1, y2)
                    print(o[0].shape)
                    assert False

        subm[name_ind] = {"id": image_id, "predicted": rle_encode_less_memory(preds)}

        del preds
        del test_dl
        gc.collect();

    submission = pd.DataFrame.from_dict(subm, orient='index')
    submission.to_csv('submission.csv', index=False)
    submission.head()



In [None]:
seed_torchv2(42)

torch.set_grad_enabled(False)


submit()


In [None]:
# sub = pd.read_csv("/kaggle/input/humapresume/submission_934.csv")

# sub.to_csv("submission.csv", index=False)