In [None]:
import os
import gc
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tifffile as tif
import cv2
import imagecodecs
from sklearn import metrics as skmetrics
import torch
import torch.nn as nn

import torch.optim as optim
import torch.nn.functional as F
import pytorch_lightning as pl

from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger

import seaborn as sns

from logging import basicConfig, getLogger, INFO
basicConfig(level=INFO, format='%(asctime)s %(levelname)s :%(message)s')
logger = getLogger(__name__)

DEBUG = False

NUM_WORKERS = 0 if os.name == 'nt' else 2
BATCH_SIZE = 2
EPOCHS = 2 if DEBUG == True else 100
PATIENCE = 4

n_train = 2 if DEBUG == True else 15
# UNet
IN_CHS = 3
OUT_CHS = 1
UNET_DEPTH = 4

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

IMG_SHAPE = (2400, 2400)# (width, height)

import multiprocessing as mp
mp.set_start_method('spawn')

#for dirname, _, filenames in os.walk('hubmap-kidney-segmentation'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

#import numpy as np # linear algebra
#import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
BASE_PATH = "/kaggle/input/hubmap-kidney-segmentation/"
TRAIN_PATH = os.path.join(BASE_PATH, "train")

In [None]:
df_train = pd.read_csv("/kaggle/input/hubmap-kidney-segmentation/train.csv")
df_train

In [None]:
tr_ids = df_train["id"]
tr_ids_l = []
for i, ids in enumerate(tr_ids):
    tr_ids_l.append(ids)
    
print(tr_ids_l)

# RLEfunction

In [None]:
def rle2mask(mask_rle, shape=IMG_SHAPE):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    Source: https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask2rle(img):
    '''
    Efficient implementation of mask2rle, from @paulorzp
    --
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    Source: https://www.kaggle.com/xhlulu/efficient-mask2rle
    '''
    pixels = img.T.flatten()
    pixels = np.pad(pixels, ((1, 1), ))
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

# DataLoader

In [None]:
predictors = []
target = []

for id in tr_ids_l[:n_train]:
    image = tif.imread(os.path.join(BASE_PATH, f"train/{id}.tiff"))
    print(id, image.shape)
    if len(image.shape) == 5:
        image = image.squeeze().transpose(1, 2, 0)
        mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        (image.shape[1], image.shape[0]))
    elif image.shape[0] == 3:
        image = image.transpose(1, 2, 0)
        mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        (image.shape[1], image.shape[0]))
    else:
        mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        (image.shape[1], image.shape[0]))
    image = cv2.resize(image, IMG_SHAPE)
    mask = cv2.resize(mask, IMG_SHAPE)
    predictors.append(image)
    target.append(mask)
    print(f"{id}")
    
logger.info('Finish dataload')

del image, mask
gc.collect()

In [None]:
predictors = np.array(predictors)
target = np.array(target)

predictors = predictors / 255 
predictors = predictors.astype(np.float32)
target = target.astype(np.float32)

predictors = np.transpose(predictors, (0, 3, 1, 2))
target = np.expand_dims(target, axis=1)

In [None]:
pred = predictors[:10]
tgt = target[:10]
val_pred = predictors[10:15]
val_tgt = target[10:15]

del predictors, target
gc.collect()

# Model

In [None]:
class ConvBNReLU(nn.Module):
    def __init__(self, in_chs, out_chs, kernel_size, padding):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_chs,
                      out_chs,
                      kernel_size=kernel_size,
                      padding=padding), nn.BatchNorm2d(out_chs),
            nn.ReLU(inplace=True))

    def forward(self, x):
        return self.block(x)


class Encoder(nn.Module):
    def __init__(self, in_chs: int, mid_chs: int, out_chs: int,
                 kernel_size: int, padding: int):
        super().__init__()
        self.out_chs = out_chs
        self.block = nn.Sequential(
            ConvBNReLU(in_chs, mid_chs, kernel_size, padding),
            ConvBNReLU(mid_chs, out_chs, kernel_size, padding),
        )

    def forward(self, x):
        return self.block(x)


class Decoder(nn.Module):
    def __init__(self,
                 in_chs,
                 out_chs,
                 kernel_size,
                 padding,
                 scale_factor: int,
                 apply_dropout=False):
        super().__init__()
        self.out_chs = out_chs
        mid_chs = (in_chs + out_chs) // 2
        self.up = nn.Upsample(scale_factor=scale_factor,
                              mode='bilinear',
                              align_corners=True)
        self.block = nn.Sequential(
            ConvBNReLU(in_chs, mid_chs, kernel_size, padding),
            ConvBNReLU(mid_chs, out_chs, kernel_size, padding),
        )

        if apply_dropout:
            self.dropout = nn.Dropout(.25)
        else:
            self.dropout = None

    def forward(self, x1, x2):
        cat = torch.cat([self.up(x1), x2], dim=1)
        x = self.block(cat)
        if self.dropout:
            x = self.dropout(x)
        return x


class UNet(nn.Module):
    def __init__(self,
                 in_chs,
                 out_chs,
                 depth,
                 ini_chs=8,
                 kernel_size=3,
                 padding=1,
                 scale_factor=2):
        '''
        Args:
            depth (int): UNets depth i.e # of downsampling layers
        '''
        super().__init__()
        self.depth = depth
        self.encs = nn.ModuleList()
        self.decs = nn.ModuleList()
        self.pools = nn.ModuleList()
        chs = ini_chs
        for i in range(depth):
            enc = Encoder(in_chs if i == 0 else chs, chs, chs * 2, kernel_size,
                          padding)
            self.encs.append(enc)
            chs = chs * 2
            if i < (depth - 1):
                self.pools.append(nn.MaxPool2d(scale_factor))

        for i in range(depth - 1):
            enc_below = self.encs[-i - 1]
            enc_left = self.encs[-i - 2]
            dec = Decoder(enc_left.out_chs + enc_below.out_chs,
                          enc_left.out_chs,
                          kernel_size,
                          padding,
                          scale_factor,
                          apply_dropout=i < depth // 2)
            self.decs.append(dec)

        self.output_layer = nn.Conv2d(self.decs[-1].out_chs,
                                      out_chs,
                                      kernel_size=1,
                                      padding=0)

    def forward(self, x):
        skips = []
        for i in range(self.depth):
            x = self.encs[i](x)
            if i < (self.depth - 1):
                skips.append(x)
                x = self.pools[i](x)

        for i in range(self.depth - 1):
            x = self.decs[i](x, skips[-(i + 1)])

        return self.output_layer(x)
    
#summary(UNet(IN_CHS, OUT_CHS, UNET_DEPTH), (3, IMG_SHAPE[1], IMG_SHAPE[0]),
        #device='cpu')

In [None]:
class LitUNet(pl.LightningModule):
    def __init__(self, in_chs, out_chs, depth):
        super().__init__()
        self.model = UNet(in_chs, out_chs, depth)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.binary_cross_entropy_with_logits(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx): #汎化性能確認のため実施
        x, y = batch
        y_hat = self.model(x)
        loss = F.binary_cross_entropy_with_logits(y_hat, y)
        self.log('val_loss', loss)
        return loss    
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

# Train

In [None]:
data = torch.from_numpy(pred).to(device)
labels = torch.FloatTensor(tgt).to(device)
train_dataset = torch.utils.data.TensorDataset(data, labels)
train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=NUM_WORKERS)

val_data = torch.from_numpy(val_pred).to("cpu")
val_labels = torch.FloatTensor(val_tgt).to("cpu")
val_dataset = torch.utils.data.TensorDataset(val_data, val_labels)
val_loader = torch.utils.data.DataLoader(val_dataset, 
                                         batch_size=BATCH_SIZE,
                                         shuffle=False,
                                         num_workers=NUM_WORKERS)

del pred, tgt, data, labels
gc.collect()

In [None]:
early_stop_callback = EarlyStopping(monitor='val_loss',
                                    patience=PATIENCE,
                                    verbose=False,
                                    mode='min')
LOGGER = TensorBoardLogger('train_logs', name='hubk_segmentation')
trainer = pl.Trainer(gpus=1 if torch.cuda.is_available() else 0,
                     max_epochs=EPOCHS,
                     logger=LOGGER,
                     log_every_n_steps=len(train_loader),
                     callbacks=[early_stop_callback])

In [None]:
model = LitUNet(IN_CHS, OUT_CHS, UNET_DEPTH)

logger.info('Start training')
try:
    trainer.fit(model, train_loader, val_loader)
except:
    print("error")
logger.info('Finish training')

# Test data load

In [None]:
df_sub = pd.read_csv(
    os.path.join(BASE_PATH, "sample_submission.csv"))
df_sub

In [None]:
test_ids = df_sub["id"]

test_ids_l = []
for i, ids in enumerate(test_ids):
    test_ids_l.append(ids)
    
print(test_ids_l)

In [None]:
predictors = []
target = []

for id in test_ids_l[:n_train]:
    image = tif.imread(os.path.join(BASE_PATH, f"test/{id}.tiff"))
    print(id, image.shape)
    if len(image.shape) == 5:
        image = image.squeeze().transpose(1, 2, 0)
        #mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        #(image.shape[1], image.shape[0]))
    elif image.shape[0] == 3:
        image = image.transpose(1, 2, 0)
        #mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        #(image.shape[1], image.shape[0]))
    #else:
        #mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        #(image.shape[1], image.shape[0]))
    image = cv2.resize(image, IMG_SHAPE)
    #mask = cv2.resize(mask, IMG_SHAPE)
    predictors.append(image)
    #target.append(mask)
    print(f"{id}")
    
print(len(predictors))
#print(len(target))
print("end")

del image
gc.collect()

In [None]:
predictors = np.array(predictors)

predictors = predictors / 255 
predictors = predictors.astype(np.float32)

predictors = np.transpose(predictors, (0, 3, 1, 2))

# Inference

In [None]:
pred_l = []

for i, dt in enumerate(predictors):
    dt = np.expand_dims(dt, axis=0)

    with torch.no_grad():
        prediction = torch.sigmoid(model(torch.FloatTensor(dt)))
        prediction = prediction.cpu().numpy().squeeze()
    _ = plt.hist(prediction, bins=20)
    plt.show()
    pred_bin = (prediction > .5).astype(np.uint8)
    pred_l.append(pred_bin)
    _ = plt.figure(figsize=(100, 100))
    plt.subplot(5, 1, i+1)
    plt.imshow(np.transpose(dt.squeeze(), (1, 2, 0)))
    plt.imshow(pred_bin, cmap="hot", alpha=0.5)
    plt.axis("off")
    if DEBUG==True:
        if i == 1:
            break
            
del dt, pred_bin, predictors, prediction
gc.collect()

# Result

In [None]:
shape_dic = {}

for id in test_ids_l[:n_train]:
    image = tif.imread(os.path.join(BASE_PATH, f"test/{id}.tiff"))
    print(id, image.shape)
    #if len(image.shape) == 5:
        #image = image.squeeze().transpose(1, 2, 0)
        #mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        #(image.shape[1], image.shape[0]))
    if image.shape[0] == 3:
        shape_dic[id] = (image.shape[2], image.shape[1])
        #image = image.transpose(1, 2, 0)
        #mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        #(image.shape[1], image.shape[0]))
    else:
        shape_dic[id] = (image.shape[1], image.shape[0])
        #mask = rle2mask(df_train[df_train["id"] == id]["encoding"].values[0],
                        #(image.shape[1], image.shape[0]))
    #image = cv2.resize(image, IMG_SHAPE)
    #mask = cv2.resize(mask, IMG_SHAPE)
    #predictors.append(image)
    #target.append(mask)
    print(f"{id}")

# shape_dicのvalueは(width, height)
print(shape_dic)

In [None]:
mask_l = []
try:
    for i, index in enumerate(shape_dic):
        image = tif.imread(os.path.join(BASE_PATH, f"test/{index}.tiff"))
        mask = pred_l[i]
        mask = cv2.resize(mask, shape_dic[index], interpolation=cv2.INTER_LINEAR)
        mask_l.append(mask)
        #_ = plt.figure(figsize=(100, 100))
        #plt.subplot(5, 1, i+1)
        #plt.imshow(image)
        #plt.imshow(mask, cmap="hot", alpha=0.5)
        #plt.axis("off")
except:
    print("error")

# Mask to RLE

In [None]:
mask_rle_l = []
for msk in mask_l:
    rle = mask2rle(msk)
    mask_rle_l.append(rle)
    
del msk
gc.collect()

In [None]:
df_sub["predicted"] = mask_rle_l

df_sub

# Submission

In [None]:
filepath = "/kaggle/input/submissioncsv/submission.csv"
if os.path.exists(filepath):
    df_sub = pd.read_csv("/kaggle/input/submissioncsv/submission.csv")
    
df_sub

In [None]:
df_sub.to_csv('submission.csv', index=False)