Imports and constants

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
from pathlib import Path
from tqdm import tqdm

import torch
import torch.utils.data as thd
import torch.nn as nn

from sklearn.metrics import fbeta_score

from scipy.ndimage.filters import gaussian_filter1d

import matplotlib.pyplot as plt

from collections import defaultdict
import os

BATCH_SIZE = 256
BUFFER = 10  # Buffer size in both dimensions: x and y. Effective patch size is [BUFFER * 2 + 1, BUFFER * 2 + 1, Z_DIM].
SLICES = 65
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 0.003
Z_START = 25
Z_END = 40
Z_DIM = Z_END - Z_START

MAX_TRAIN_STEPS = 30000000
MAX_VAL_STEPS = 1000
PRINT_EVERY = 20000000

First, let's see if we can fit all one full fragment into memory at once.

In [None]:
def pad_array(array):
    padding = (
        (BUFFER, BUFFER),
        (BUFFER, BUFFER),
    )
    return np.pad(array, padding)

def load_fragment_surface(fragment, split='train'):
    print("Loading fragment %s surface" % fragment)
    surface_path = Path("/kaggle/input/vesuvius-challenge-ink-detection/%s/%s/surface_volume" % (split, fragment))
    return np.array([
        pad_array(np.array(Image.open(f)))
        for f in tqdm(sorted(surface_path.rglob("*.tif"))[Z_START:Z_END])
    ])

def load_mask(fragment, split='train'):
    mask_path = Path("/kaggle/input/vesuvius-challenge-ink-detection/%s/%s/mask.png" % (split, fragment))
    return pad_array(np.array(Image.open(mask_path)))

def load_inklabels(fragment, split='train'):
    inklabels_path = Path("/kaggle/input/vesuvius-challenge-ink-detection/%s/%s/inklabels.png" % (split, fragment))
    return pad_array(np.array(Image.open(inklabels_path)))

In [None]:
class SingleFragmentDataset(thd.Dataset):
    def __init__(self, fragment, is_test=False):
        self.is_test = is_test
        split = 'test' if is_test else 'train'
        self.surface = load_fragment_surface(fragment, split)
        self.mask = load_mask(fragment, split)
        self.inklabels = load_inklabels(fragment, split) if not is_test else None
        self.pixels = np.stack(np.where(self.mask == 1), axis=1)
    
    def __len__(self):
        return self.pixels.shape[0]
    
    def __getitem__(self, index):
        x, y = self.pixels[index]
        x_start = x - BUFFER
        x_end = x + BUFFER + 1
        y_start = y - BUFFER
        y_end = y + BUFFER + 1
        patch_surface = np.s_[:, x_start:x_end, y_start:y_end]
        patch_labels = np.s_[x_start:x_end, y_start:y_end]
#         return self.surface[patch_surface].astype(np.float32), self.inklabels[patch_labels].astype(np.float32)
        surface = self.surface[patch_surface].astype(np.float32)
        labels = self.inklabels[x, y].reshape((1, )).astype(np.float32) if not self.is_test else None
        return (surface, labels) if not self.is_test else (surface, index)

In [None]:
%%time
train_dataset = SingleFragmentDataset(1)

In [None]:
train_loader = thd.DataLoader(train_dataset, BATCH_SIZE, shuffle=True)

Define the model

In [None]:
convnet = nn.Sequential(
    nn.Conv2d(Z_DIM, 32, kernel_size=3, stride=1, dilation=1, padding='same'),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 16, kernel_size=3, stride=1, dilation=1, padding='same'),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.BatchNorm2d(16),
    nn.Conv2d(16, 8, kernel_size=3, stride=1, dilation=1, padding='same'),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.BatchNorm2d(8),
    nn.Conv2d(8, 1, kernel_size=3, stride=1, dilation=1, padding='same'),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear((2 * BUFFER + 1) ** 2, 128),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.Linear(64, 1),
    nn.Sigmoid()
).to(DEVICE)

Train the model

In [None]:
%%time
losses = []
accs = []
fbetas = []

convnet.train()
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(convnet.parameters(), lr=LEARNING_RATE)
for i, (xs, ys) in enumerate(pbar := tqdm(train_loader)):
    if i > MAX_TRAIN_STEPS:
        break
    optimizer.zero_grad()
    outputs = convnet(xs.to(DEVICE))
    if i % PRINT_EVERY == PRINT_EVERY - 1:
        print(outputs)
        plt.plot(gaussian_filter1d(losses, sigma=10), label='loss')
        plt.plot(gaussian_filter1d(accs, sigma=10), label='accs')
        plt.plot(gaussian_filter1d(fbetas, sigma=10), label='fbetas')
        plt.legend()
    loss = criterion(outputs, ys.to(DEVICE))
    pred_ink = outputs.detach().gt(0.4).cpu().int()
    accuracy = (pred_ink == ys).sum().float().div(ys.size(0))
    fbeta = fbeta_score(ys.view(-1).numpy(), pred_ink.view(-1).numpy(), beta=0.5)
    pbar.set_postfix({"loss": loss, "acc": accuracy, "fbeta": fbeta})
    loss.backward()
    optimizer.step()
    
    fbetas.append(fbeta)
    losses.append(loss.detach().cpu().float())
    accs.append(accuracy)

In [None]:
plt.plot(gaussian_filter1d(losses, sigma=10), label='loss')
plt.plot(gaussian_filter1d(accs, sigma=10), label='accs')
plt.plot(gaussian_filter1d(fbetas, sigma=10), label='fbetas')
plt.legend()

Validate the model

In [None]:
del train_loader
del train_dataset

convnet.eval()
val_dataset = SingleFragmentDataset(2)
val_loader = thd.DataLoader(val_dataset, BATCH_SIZE, shuffle=True)

losses = []
accs = []
fbetas = []

for i, (xs, ys) in enumerate(pbar := tqdm(val_loader)):
    if i > MAX_VAL_STEPS:
        break
    outputs = convnet(xs.to(DEVICE))
    loss = criterion(outputs, ys.to(DEVICE))
    pred_ink = outputs.detach().gt(0.4).cpu().int()
    accuracy = (pred_ink == ys).sum().float().div(ys.size(0))
    fbeta = fbeta_score(ys.view(-1).numpy(), pred_ink.view(-1).numpy(), beta=0.5)
    pbar.set_postfix({"loss": loss, "acc": accuracy, "fbeta": fbeta})
    
    fbetas.append(fbeta)
    losses.append(loss.detach().cpu().float())
    accs.append(accuracy)

print("Mean loss: ", np.mean(losses))
print("Mean acc: ", np.mean(accs))
print("Mean fbetas: ", np.mean(fbetas))

Generate test predictions

In [None]:
del val_loader
del val_dataset

submission = defaultdict(list)

for fragment in ['a', 'b']:
    pixels_with_ink = []
    print(f"Generating predictions for fragment {fragment}")
    test_dataset = SingleFragmentDataset(fragment, is_test=True)
    test_loader = thd.DataLoader(test_dataset, BATCH_SIZE, shuffle=True)
    for (xs, ys) in (pbar := tqdm(test_loader)):
        output = convnet(xs.to(DEVICE))
        pred_ink = outputs.detach().gt(0.4).flatten().cpu().int()
        pred_ink = pred_ink[:len(ys)]
        pixels_with_ink += ys[pred_ink == 1].int().tolist()
    pixels_with_ink.sort()
    
    submission["Id"].append(fragment)
    submission["Predicted"].append(' '.join(list(map(lambda p: "%s 1" % p, pixels_with_ink))))
    
pd.DataFrame.from_dict(submission).to_csv("/kaggle/working/submission.csv", index=False)

In [None]:
pd.DataFrame.from_dict(submission)