In [1]:
import os
from kaggle_secrets import UserSecretsClient

secrets = UserSecretsClient()

try:
    import angionet
except ImportError:
    GITHUB_TOKEN = secrets.get_secret("github-token")
    USERNAME = secrets.get_secret("github-username")
    URL = f"https://{USERNAME}:{GITHUB_TOKEN}@github.com/{USERNAME}/sennet-segmentation.git"

    os.system(f"pip install -q git+{URL}")

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.7.0 requires botocore<1.31.65,>=1.31.16, but you have botocore 1.34.7 which is incompatible.[0m[31m
[0m

In [2]:
from functools import partial
from pathlib import Path
import gc

import albumentations as A
import albumentations.pytorch as AP
import numpy as np
import pandas as pd
import segmentation_models_pytorch as smp
import torch
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import cv2

from angionet.core import evaluate, train, predict
from angionet.datasets import TrainDataset, InferenceDataset
from angionet.losses import DiceLoss, GenSurfLoss
from angionet.metrics import dice, summary
from angionet.utils import set_seed, visualize
from angionet.functional import standardize, rescale, decode, colorize
from angionet.postprocessing import fill_holes, apply_threshold


from albumentations.core.transforms_interface import ImageOnlyTransform

class Rescale(ImageOnlyTransform):
    def __init__(self, **kwargs):
        super().__init__(always_apply = False, p = 1)

    def apply(self, image, **kwargs):
        image = (image - image.min()) / (image.max() - image.min())
        return np.asarray(image, dtype = 'float32')



In [3]:
import torch

def min_max_normalization(x):
    """input.shape=(batch,f1,...)"""
    shape=x.shape
    if x.ndim>2:
        x=x.reshape(x.shape[0],-1)
    
    min_=x.min(dim=-1,keepdim=True)[0]
    max_=x.max(dim=-1,keepdim=True)[0]
    if min_.mean()==0 and max_.mean()==1:
        return x.reshape(shape)
    
    x=(x-min_)/(max_-min_+1e-9)
    return x.reshape(shape)
import numpy as np
x = np.random.randint(0, 255, (5, 1, 10, 10))

x.min(axis = 1).shape

(5, 10, 10)

In [4]:
class config:
    seed           = 42
    root           = "/kaggle/input/blood-vessel-segmentation"
    data           = [
                        "/kaggle/input/sennet-slicing-hxw",
                        "/kaggle/input/sennet-slicing-dxh",
                        "/kaggle/input/sennet-slicing-dxw",
                     ]
    batch_size     = 16
    epochs         = 10
    dim            = 512
    stride         = 412
    padding        = 'reflect'
    thresholds     = (0.2, 0.5)
    backbone       = "tu-seresnext50_32x4d"
    train          = ['kidney_1_dense']
    test           = ['kidney_3_dense']
    split          = (10, 1) # stride    
    accumulate     = 3
    learning_rate  = 5e-4
    weight_decay   = 1e-2
    clipnorm       = 6.0

    transforms = {
        "train": A.Compose([
            A.HorizontalFlip(p = 1),
            A.VerticalFlip(p = 1),
            A.RandomRotate90(p = 1),
            A.RandomBrightnessContrast(p = 1),
            Rescale(),
            AP.ToTensorV2()
        ]),
        
        "test": A.Compose([
            Rescale(),
            AP.ToTensorV2(),
        ])
    }
    
    @staticmethod
    def to_dict():
        return {
            key:value 
            for key, value in vars(config).items() 
            if not key.startswith('__') and not callable(value)
        }
    
set_seed(seed = config.seed)

In [5]:
# Add train data
train_data = []
for data in config.data:
    train_data.append(pd.read_csv(Path(data, "patches-data.csv")))

# Split into train/test
train_data = pd.concat(train_data, axis=0)
train_data = train_data.loc[train_data.group.isin(config.train)]
train_data['stage'] = 'train'
train_data = train_data.sort_values(["group", "image", "axis"])

# Add prefix path
dirs = {g:p for g, p in zip(["HxW", "DxH", "DxW"], config.data)}

# Subsample data: select each k-th row
ids = train_data['id'].drop_duplicates().iloc[::config.split[0]]
train_data = train_data.loc[(train_data.id.isin(ids)) & (train_data.vessels_pixels > 50)]
train_data.drop(['kidney_pixels'], axis = 1, inplace = True)

# Add test data
test_data = []
for path in config.data:
    test_data.append(pd.read_csv(Path(path, "images/train_rles.csv")))
test_data = pd.concat(test_data)
test_data = test_data.loc[(test_data['group'].isin(config.test)) & (test_data['axis'] == 'HxW')]
test_data['stage'] = 'test'
test_data['vessels_pixels'] = test_data['vessels'].apply(lambda x: sum(int(p) for p in x.split()[1::2]))
test_data = test_data.sort_values(["group", "image"])[train_data.columns.tolist() + ["vessels"]]

# Merge
df = pd.concat((train_data, test_data))
df['path'] = df.apply(lambda x: f"{dirs[x.axis]}/{x.path}", axis = 1)
df = df.fillna('-')
df = df.reset_index(drop = True)

print("Samples:", f"  - Train: {len(train_data)}", f"  - Test : {len(test_data)}", sep = "\n")

display(df.sample(5))

Samples:
  - Train: 3050
  - Test : 456


Unnamed: 0,path,vessels_pixels,group,image,id,axis,stage,vessels
3272,/kaggle/input/sennet-slicing-hxw/images/kidney...,6939,kidney_3_dense,718,kidney_3_dense_0718,HxW,test,121405 2 122916 1 130465 3 131975 3 138017 1 1...
315,/kaggle/input/sennet-slicing-dxw/kidney_1_dens...,231,kidney_1_dense,260,kidney_1_dense_0260,DxW,train,-
2179,/kaggle/input/sennet-slicing-dxw/kidney_1_dens...,828,kidney_1_dense,940,kidney_1_dense_0940,DxW,train,-
3359,/kaggle/input/sennet-slicing-hxw/images/kidney...,5401,kidney_3_dense,805,kidney_3_dense_0805,HxW,test,172733 1 174242 2 175751 2 177261 2 178772 1 1...
430,/kaggle/input/sennet-slicing-dxw/kidney_1_dens...,1121,kidney_1_dense,300,kidney_1_dense_0300,DxW,train,-


In [6]:
ds_train = TrainDataset(
    df.loc[df.stage == 'train', 'path'].values,
    transforms = config.transforms['train'],
    class_index = [0],
    normalization = None,
    dtms = False
)

ds_test = InferenceDataset(
    df.loc[df.stage == 'test', 'path'].values,
    transforms = config.transforms['test']
)

dl_train = DataLoader(
    ds_train,
    shuffle=True,
    batch_size=config.batch_size,
    num_workers=torch.get_num_threads() * 2,
    drop_last=True,
    pin_memory=True
)

In [7]:
T_max = int(len(ds_train) / (config.batch_size * config.accumulate) * config.epochs)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = smp.Unet(
    encoder_name=config.backbone,
    in_channels=1,
    classes=1,
    activation=None
).to(device)

criterion = smp.losses.DiceLoss(mode = 'binary')
metric = dice

optimizer = torch.optim.AdamW(
    model.parameters(), 
    lr=config.learning_rate, 
    weight_decay=config.weight_decay
)

scheduler = CosineAnnealingLR(
    optimizer,
    T_max=T_max,
    eta_min=1e-5,
)

Downloading model.safetensors:   0%|          | 0.00/111M [00:00<?, ?B/s]

In [8]:
from neptune_pytorch import NeptuneLogger
import neptune
from neptune.utils import stringify_unsupported
from neptune.types import File
from angionet.utils import prettify_transforms

NEPTUNE_TOKEN = secrets.get_secret('neptune-token')
run = neptune.init_run(
    api_token=NEPTUNE_TOKEN,
    project="segteam/sennet",
    tags=[config.backbone],
    capture_hardware_metrics=True
)

runtime = {
    "model": type(model).__name__,
    "criterion": type(criterion).__name__,
    "region-loss": type(vars(criterion)['_modules'].get("region_loss")).__name__,
    "class-weights": vars(criterion).get('class_weights'),
    "scoring": metric.__name__,
    "optimizer": type(optimizer).__name__,
    "scheduler": type(scheduler).__name__,
}

runtime.update({key: value 
                for key, value in config.to_dict().items() 
                if key not in ['transforms']})
runtime.update(prettify_transforms(config.transforms))

run["configuration"] = stringify_unsupported(runtime)
run['data/train'].upload(File.as_html(df.query("stage == 'train'")))
run['data/test'].upload(File.as_html(df.query("stage == 'test'")))

logger = NeptuneLogger(
    run=run,
    model=model,
    log_gradients=True,
)


The following monitoring options are disabled by default in interactive sessions: 'capture_stdout', 'capture_stderr', 'capture_traceback', and 'capture_hardware_metrics'. To enable them, set each parameter to 'True' when initializing the run. The monitoring will continue until you call run.stop() or the kernel stops. Also note: Your source files can only be tracked if you pass the path(s) to the 'source_code' argument. For help, see the Neptune docs: https://docs.neptune.ai/logging/source_code/



https://app.neptune.ai/segteam/sennet/e/ANG-82


In [9]:
class EarlyStopping:
    def __init__(self, patience = 3):
        self.patience = patience
        self.epoch = 0
        self.iter = 0
        self.best = -np.inf
        self.msg = "Objective improved {:.5f} -> {:.5f} at epoch {}"
        self.sigterm = False
        
    def __call__(self, current):
        improvements = False
        if current > self.best:
            print(self.msg.format(self.best, current, self.epoch))
            self.iter = 0
            self.best = current
            improvements = True
        else:
            self.iter = self.iter + 1

        self.epoch = self.epoch + 1
        if self.iter == self.patience:
            self.sigterm = True
        
        return improvements

In [10]:
from angionet.functional import decode
H, W = ds_test[0][0].shape
masks = np.stack([decode(rle, (H, W)) for rle in df.loc[df.stage == 'test', 'vessels']])

In [11]:
es = EarlyStopping(patience = 3)
for epoch in range(config.epochs):
    if es.sigterm:
        break
    train_loss, train_score = train(
        model = model,
        loader = dl_train,
        optimizer = optimizer,
        criterion = criterion,
        scoring = metric,
        device = device,
        scheduler = scheduler,
        config = config
    )
    
    output = predict(
        model = model, 
        dataset = ds_test, 
        dim = config.dim,
        stride = config.stride,
        padding = config.padding,
        batch_size = 4,
        device = device
    )
    output = apply_threshold(output, *config.thresholds)
#     output = np.asarray(output > 0.5, dtype = "uint8")
    output = fill_holes(output)
    scores = summary(torch.from_numpy(output), torch.from_numpy(masks))
    
    run['train'].append({'loss': train_loss, 'score': train_score})
    run['test'].append(scores)
    if es(scores['surface-dice']):
        filepath = f"checkpoint-{epoch}.pt"
        torch.save(model, filepath)
        run[f'models/checkpoint-{epoch}'].upload(filepath)
        indices = np.random.choice(len(ds_test), size = 16, replace = False)
        for index in indices:
            masked = colorize(ds_test[index][0].numpy(), masks[index], output[index])
            run['test/predictions'].append(File.as_image(masked / 255.0))

run['test/highest-score'] = es.best
run.stop()

Train: 100%|██████████| 190/190 [03:02<00:00,  1.04it/s, loss=0.468, score=0.868]
Processing: 100%|██████████| 114/114 [02:07<00:00,  1.12s/it]


Objective improved -inf -> 0.45732 at epoch 0


Train: 100%|██████████| 190/190 [03:12<00:00,  1.02s/it, loss=0.124, score=0.926]
Processing: 100%|██████████| 114/114 [02:07<00:00,  1.12s/it]


Objective improved 0.45732 -> 0.75003 at epoch 1


Train: 100%|██████████| 190/190 [03:15<00:00,  1.03s/it, loss=0.0904, score=0.9]
Processing: 100%|██████████| 114/114 [02:05<00:00,  1.10s/it]


Objective improved 0.75003 -> 0.75448 at epoch 2


Train: 100%|██████████| 190/190 [03:21<00:00,  1.06s/it, loss=0.102, score=0.865]
Processing: 100%|██████████| 114/114 [02:06<00:00,  1.11s/it]
Train: 100%|██████████| 190/190 [02:52<00:00,  1.10it/s, loss=0.0419, score=0.912]
Processing: 100%|██████████| 114/114 [02:08<00:00,  1.12s/it]
Train: 100%|██████████| 190/190 [02:52<00:00,  1.10it/s, loss=0.0818, score=0.909]
Processing: 100%|██████████| 114/114 [02:08<00:00,  1.12s/it]


Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/segteam/sennet/e/ANG-82/metadata
