In [None]:
try:
  from google.colab import drive
  drive.mount('/content/drive')
except:
  pass

In [None]:
!mkdir /kaggle
!mkdir /kaggle/input
!mkdir /kaggle/working
!ln -s /content/drive/Shareddrives/"Kaggle data"/vesuvius-challenge-ink-detection /kaggle/input/
!ln -s /content/drive/Shareddrives/"Kaggle data"/resnet50-pretrained /kaggle/input/
!ln -s /content/drive/Shareddrives/"Kaggle data"/working /kaggle/working

* https://github.com/qubvel/segmentation_models.pytorch

Imports

In [None]:
import torch
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
from PIL import Image
from tqdm.auto import tqdm
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
import numpy as np
from sklearn.metrics import fbeta_score, precision_score, recall_score
from scipy.ndimage.filters import gaussian_filter1d
import matplotlib.pyplot as plt
import datetime

from collections import defaultdict

import pandas as pd

import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

Download the model. Ran once and created the Kaggle dataset

In [None]:
def download_and_save_resnet50(output_path):
    model = fcn_resnet50(FCN_ResNet50_Weights.DEFAULT)
    torch.save(model, output_path)

Constants & config

In [None]:
MODEL_PATH = '/kaggle/input/resnet50-pretrained/model'

YX_DIM = 32  # Specify only one dimension and use square patches.
Z_START = 26
Z_END = 32
Z_STEP = 2
Z_DIM = (Z_END - Z_START) // Z_STEP
assert Z_DIM == 3

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BSIZE = 256
LR = 0.003

TQDM_OFF = False

PLOT_EVERY = 1000  # Steps
MAX_STEPS = 10000

TH = 0.5

Dataset definition

In [None]:
def load_mask(fragment_name, split_name):
    print(f"Loading mask {split_name}/{fragment_name}")
    return np.array(Image.open(f"/kaggle/input/vesuvius-challenge-ink-detection/{split_name}/{fragment_name}/mask.png"))

def load_inklabels(fragment_name, split_name):
    if split_name == 'test':
        return None
    print(f"Loading inklabels {split_name}/{fragment_name}")
    return np.array(Image.open(f"/kaggle/input/vesuvius-challenge-ink-detection/{split_name}/{fragment_name}/inklabels.png")).astype('float32')

def load_surface(fragment_name, split_name):
    print(f"Loading surface")
    surface = None
    for i in tqdm(range(Z_DIM), disable=TQDM_OFF):
        l = Z_START + i * Z_STEP
        sslice = np.array(Image.open(f"/kaggle/input/vesuvius-challenge-ink-detection/{split_name}/{fragment_name}/surface_volume/{l:02}.tif"))
        sslice = (sslice / 2**16).astype('float32')
        surface = surface if surface is not None else np.zeros([Z_DIM, *sslice.shape], dtype='float32')
        surface[i] = sslice
    return surface

class SlicedDataset(torch.utils.data.Dataset):
    def __init__(self, fragment_name, split_name):
        self.fragment_name = fragment_name
        self.split_name = split_name
        self.surface = load_surface(fragment_name, split_name)
        self.mask = load_mask(fragment_name, split_name)
        self.inklabels = load_inklabels(fragment_name, split_name)
        if self.inklabels is None:
            self.inklabels = np.zeros(self.mask.shape)

    def __len__(self):
        return (self.surface.shape[1] - YX_DIM) * (self.surface.shape[2] - YX_DIM)

    def getitem(self, y, x):
        surface = self.surface[:, y:y + YX_DIM, x:x + YX_DIM]
        labels = self.inklabels[y:y + YX_DIM, x:x + YX_DIM].reshape((1, YX_DIM, YX_DIM))
        return surface, labels, (y, x)

    def __getitem__(self, idx):
        y = idx // (self.surface.shape[2] - YX_DIM)
        x = idx % (self.surface.shape[2] - YX_DIM)
        return self.getitem(y, x)

In [None]:
x = SlicedDataset('1', 'train')

Model definition

In [None]:
fcn_model = torch.load(MODEL_PATH)
fcn_model = fcn_model
# fcn_model.backbone.conv1 = torch.nn.Conv2d(
#     Z_DIM, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
fcn_model.classifier[4] = torch.nn.Conv2d(512, 1, kernel_size=(1, 1), stride=(1, 1))

fcn_model = fcn_model.to(DEVICE)

Freeze backbone, then unfreeze backbone.conv1

In [None]:
# for param in fcn_model.backbone.parameters():
#   param.requires_grad = False

# for param in fcn_model.backbone.conv1.parameters():
#   param.requires_grad = True

Train on fragments 1 and 2

In [None]:
def train_on_fragment(fragment_name):
    ds = SlicedDataset(fragment_name, 'train')
    dl = torch.utils.data.DataLoader(ds, batch_size=BSIZE, shuffle=True)
    class WeightedBCELoss(torch.nn.Module):
      def __init__(self):
          super().__init__()

      def forward(self, pred, actual):
          weights = torch.ones(actual.shape).to(DEVICE)
          weights[torch.where(actual == 1.)] = 3.
          return torch.nn.BCELoss(weights)(pred, actual)

    criterion = WeightedBCELoss()
    optimizer = torch.optim.Adam(fcn_model.parameters(), lr=LR)

    losses = []
    fbetas = []
    precisions = []
    recalls = []
    gradnorms = []
    predsnorms = []
    livegen = np.zeros(ds.surface.shape[1:])

    def gradnorm():
        grads = [
            param.grad.detach().flatten()
            for param in fcn_model.parameters()
            if param.grad is not None
        ]
        norm = torch.cat(grads).norm().cpu()
        return norm

    def plot():
        fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(15, 3))
        fig.suptitle(f'Training on {fragment_name}')
        axs[0].plot(gaussian_filter1d(losses, sigma=25), label='loss')
        axs[0].plot(gaussian_filter1d(precisions, sigma=25), label='precisions')
        axs[0].plot(gaussian_filter1d(recalls, sigma=25), label='recalls')
        axs[0].plot(gaussian_filter1d(fbetas, sigma=25), label='fbetas')
        axs[1].plot(gaussian_filter1d(gradnorms, sigma=25), label='gradnorms')
        axs[2].plot(gaussian_filter1d(predsnorms, sigma=25), label='predsnorms')
        axs[3].imshow(livegen)
        axs[0].legend()
        axs[1].legend()
        axs[2].legend()
        plt.savefig(f'/kaggle/working/training_{fragment_name}_{datetime.datetime.now().strftime("%d-%m-%H:%M:%S")}.png')
        plt.show()

    for i, (surface, inklabels, (ys, xs)) in enumerate(pbar := tqdm(dl, disable=TQDM_OFF)):
        if i > MAX_STEPS:
            break
        optimizer.zero_grad()

        preds = torch.sigmoid(fcn_model(surface.to(DEVICE))['out'])
        pred_ink = preds.detach().gt(0.5).cpu().int()
        loss = criterion(preds, inklabels.to(DEVICE))
        loss.backward()
        optimizer.step()

        # TODO: vectorize
        for j in range(BSIZE):
          livegen[ys[j]:ys[j] + YX_DIM, xs[j]:xs[j] + YX_DIM] = preds[j, 0].detach().cpu().numpy()

        fbeta = fbeta_score(inklabels.view(-1).numpy(), pred_ink.view(-1).numpy(), beta=0.5)
        precision = precision_score(inklabels.view(-1).numpy(), pred_ink.view(-1).numpy())
        recall = recall_score(inklabels.view(-1).numpy(), pred_ink.view(-1).numpy())

        fbetas.append(fbeta)
        losses.append(loss.detach().cpu().float())
        precisions.append(precision)
        recalls.append(recall)
        gradnorms.append(gradnorm())
        predsnorms.append(preds.detach().norm().cpu())

        pbar.set_postfix({
            "loss": loss,
            "prec": precision,
            "rec": recall,
            "fbeta": fbeta,
            "gradnorm": gradnorms[-1],
            "predsnorm": predsnorms[-1]})

        if i % PLOT_EVERY == PLOT_EVERY - 1:
            plot()

    plot()

train_on_fragment('2')
train_on_fragment('1')

Define a function to generate the image

In [None]:
def generate_prediction(dataset):
    out = np.zeros(dataset.surface.shape[1:])
    # TODO: handle borders better
    for y in tqdm(range(0, dataset.surface.shape[1] - YX_DIM, YX_DIM), disable=TQDM_OFF):
        for x in range(0, dataset.surface.shape[2] - YX_DIM, YX_DIM):
            surface, _, _ = dataset.getitem(y, x)
            surface = torch.Tensor(np.expand_dims(surface, axis=0)).to(DEVICE)
            preds = torch.sigmoid(fcn_model(torch.Tensor(surface).to(DEVICE))['out'])
            out[y:y + YX_DIM, x:x + YX_DIM] = preds[0].detach().cpu().numpy()
    out *= dataset.mask
    out[np.where(out < TH)] = 0.
    out[np.where(out >= TH)] = 1.
    return out

Validate on fragment 3. Dipslay predicted image.

In [None]:
ds = SlicedDataset('3', 'train')

out = generate_prediction(ds)

In [None]:
%%time
fbeta = fbeta_score(ds.inklabels.flatten().astype(int), out.flatten().astype(int), beta=0.5)
precision = precision_score(ds.inklabels.flatten().astype(int), out.flatten().astype(int))
recall = recall_score(ds.inklabels.flatten().astype(int), out.flatten().astype(int))

print(f"fbeta: {fbeta} precision: {precision} recall: {recall}")

Test output serialization on train data

In [None]:
def rle(preds):
    preds = preds.flatten()
    starts = 2 + np.array(np.where(preds[1:] - preds[:-1] == 1.)).flatten()
    ends = 2 + np.array(np.where(preds[1:] - preds[:-1] == -1.)).flatten()
    return np.stack([starts, ends - starts], axis=1)

def serialize_rle(rle):
    return ' '.join(f"{x[0]} {x[1]}" for x in rle)

In [None]:
ds = SlicedDataset('3', 'train')
print(serialize_rle(rle(ds.inklabels)))
!cat /kaggle/input/vesuvius-challenge-ink-detection/train/3/inklabels_rle.csv

Generate test predictions

In [None]:
submission = defaultdict(list)

for fragment_name in os.listdir('/kaggle/input/vesuvius-challenge-ink-detection/test/'):
    ds = SlicedDataset(fragment_name, 'test')
    out = generate_prediction(ds)

    submission["Id"].append(fragment_name)
    submission["Predicted"].append(serialize_rle(rle(out)))

pd.DataFrame.from_dict(submission).to_csv("/kaggle/working/submission.csv", index=False)

In [None]:
pd.DataFrame.from_dict(submission)

In [None]:
!cut -c-100 /kaggle/working/submission.csv