In [1]:
# TODO: Mount Google Drive data if necessary

* https://github.com/qubvel/segmentation_models.pytorch

Imports

In [2]:
import torch
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
from PIL import Image
from tqdm.auto import tqdm
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
import numpy as np
from sklearn.metrics import fbeta_score, precision_score, recall_score
from scipy.ndimage.filters import gaussian_filter1d
import matplotlib as plt

import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

Download the model. Ran once and created the Kaggle dataset

In [3]:
def download_and_save_resnet50(output_path):
    model = fcn_resnet50(FCN_ResNet50_Weights.DEFAULT)
    torch.save(model, output_path)

Constants & config

In [4]:
MODEL_PATH = '/kaggle/input/resnet50-pretrained/model'

YX_DIM = 32  # Specify only one dimension and use square patches.
Z_START = 26
Z_END = 36
Z_STEP = 2
Z_DIM = (Z_END - Z_START) // Z_STEP

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BSIZE = 32

TQDM_OFF = False

Dataset definition

In [5]:
def load_mask(fragment_name, split_name):
    print(f"Loading mask {split_name}/{fragment_name}")
    return np.array(Image.open(f"/kaggle/input/vesuvius-challenge-ink-detection/{split_name}/{fragment_name}/mask.png"))

def load_inklabels(fragment_name, split_name):
    print(f"Loading inklabels {split_name}/{fragment_name}")
    return np.array(Image.open(f"/kaggle/input/vesuvius-challenge-ink-detection/{split_name}/{fragment_name}/inklabels.png")).astype('float32')

def load_surface(fragment_name, split_name):
    print(f"Loading surface")
    surface = None
    for i in tqdm(range(Z_DIM), disable=TQDM_OFF):
        l = Z_START + i * Z_STEP
        sslice = np.array(Image.open(f"/kaggle/input/vesuvius-challenge-ink-detection/{split_name}/{fragment_name}/surface_volume/{l:02}.tif"))
        sslice = (sslice / 2**16).astype('float32')
        surface = surface if surface is not None else np.zeros([Z_DIM, *sslice.shape], dtype='float32')
        surface[i] = sslice
    return surface

class SlicedDataset(torch.utils.data.Dataset):
    def __init__(self, fragment_name, split_name):
        self.fragment_name = fragment_name
        self.split_name = split_name
        self.surface = load_surface(fragment_name, split_name)
        self.mask = load_mask(fragment_name, split_name)
        self.inklabels = load_inklabels(fragment_name, split_name)

    def __len__(self):
        return (self.surface.shape[1] - YX_DIM) * (self.surface.shape[2] - YX_DIM)

    def __getitem__(self, idx):
        y = idx // (self.surface.shape[2] - YX_DIM)
        x = idx % (self.surface.shape[2] - YX_DIM)
        surface = self.surface[:, y:y + YX_DIM, x:x + YX_DIM]
        labels = self.inklabels[y:y + YX_DIM, x:x + YX_DIM].reshape((1, YX_DIM, YX_DIM))
        return surface, labels

In [6]:
x = SlicedDataset('1', 'train')

Loading surface


  0%|          | 0/5 [00:00<?, ?it/s]

Loading mask train/1
Loading inklabels train/1


Model definition

In [7]:
fcn_model = torch.load(MODEL_PATH)
fcn_model = fcn_model
fcn_model.backbone.conv1 = torch.nn.Conv2d(
    Z_DIM, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
fcn_model.classifier[4] = torch.nn.Conv2d(512, 1, kernel_size=(1, 1), stride=(1, 1))

fcn_model = fcn_model.to(DEVICE)

In [8]:
def train_on_fragment(fragment_name):
    ds = SlicedDataset(fragment_name, 'train')
    dl = torch.utils.data.DataLoader(ds, batch_size=BSIZE, shuffle=True)
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(fcn_model.parameters())
    
    losses = []
    fbetas = []
    precisions = []
    recalls = []
    
    for surface, inklabels in (pbar := tqdm(dl, disable=TQDM_OFF)):
        preds = torch.sigmoid(fcn_model(surface.to(DEVICE))['out'])
        pred_ink = preds.detach().gt(0.5).cpu().int()
        loss = criterion(preds, inklabels.to(DEVICE))
        loss.backward()
        optimizer.step()
        
        fbeta = fbeta_score(inklabels.view(-1).numpy(), pred_ink.view(-1).numpy(), beta=0.5)
        precision = precision_score(inklabels.view(-1).numpy(), pred_ink.view(-1).numpy())
        recall = recall_score(inklabels.view(-1).numpy(), pred_ink.view(-1).numpy())
        
        pbar.set_postfix({"loss": loss, "prec": precision, "rec": recall, "fbeta": fbeta})
        
        fbetas.append(fbeta)
        losses.append(loss.detach().cpu().float())
        precisions.append(precision)
        recalls.append(recall)

    plt.plot(gaussian_filter1d(losses, sigma=25), label='loss')
    plt.plot(gaussian_filter1d(precisions, sigma=25), label='precisions')
    plt.plot(gaussian_filter1d(recalls, sigma=25), label='recalls')
    plt.plot(gaussian_filter1d(fbetas, sigma=25), label='fbetas')

train_on_fragment('1')
train_on_fragment('2')

Loading surface


  0%|          | 0/5 [00:00<?, ?it/s]

Loading mask train/1
Loading inklabels train/1


  0%|          | 0/1603826 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
%%time
print(fcn_model(torch.zeros(BSIZE, Z_DIM, 64, 64).to(DEVICE))['out'].shape)

In [None]:
ds = SlicedDataset('1', 'train')

In [None]:
print(ds.inklabels.sum() / (ds.inklabels.shape[0] * ds.inklabels.shape[1]))

Train on fragments 1 and 2

Validate on fragment 3. Dipslay predicted image for different thresholds.

Pick the best threshold

Test output serialization on train data

Generate test predictions