Imports and constants

In [54]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
from pathlib import Path
from tqdm import tqdm

import torch
import torch.utils.data as thd
import torch.nn as nn

from sklearn.metrics import fbeta_score, precision_score, recall_score
from scipy.ndimage.filters import gaussian_filter1d

import matplotlib.pyplot as plt
from collections import defaultdict
import os
import gc
from operator import itemgetter
from pympler import tracker

# Ignore SKLearn warnings
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

BATCH_SIZE = 256
BUFFER = 10  # Buffer size in both dimensions: x and y. Effective patch size is [BUFFER * 2 + 1, BUFFER * 2 + 1, Z_DIM].
SLICES = 65
DEVICE = "cpu"
LEARNING_RATE = 0.3
Z_START = 24
Z_END = 34
Z_STEP = 1
Z_DIM = (Z_END - Z_START) // Z_STEP
TRAIN_ON_FRAGMENTS = [1, 2]
VAL_FRAGMENT = 3
DISABLE_TQDM = True

MAX_TRAIN_STEPS = 20000
MAX_VAL_STEPS = 1000
PRINT_EVERY = 20000000

First, let's see if we can fit all one full fragment into memory at once.

In [2]:
def pad_array(array):
    padding = (
        (BUFFER, BUFFER),
        (BUFFER, BUFFER),
    )
    return np.pad(array, padding)

def load_fragment_surface(fragment, split='train'):
    print("Loading fragment %s surface" % fragment)
    surface_path = Path("/kaggle/input/vesuvius-challenge-ink-detection/%s/%s/surface_volume" % (split, fragment))
    return np.array([
        (pad_array(np.array(Image.open(f))) / (2 ** 16)).astype('float16')
        for f in tqdm(sorted(surface_path.rglob("*.tif"))[Z_START:Z_END:Z_STEP], disable=DISABLE_TQDM)
    ])

def load_mask(fragment, split='train'):
    print("Loading fragment %s mask" % fragment)
    mask_path = Path("/kaggle/input/vesuvius-challenge-ink-detection/%s/%s/mask.png" % (split, fragment))
    return pad_array(np.array(Image.open(mask_path)))

def load_inklabels(fragment, split='train'):
    print("Loading fragment %s labels" % fragment)
    inklabels_path = Path("/kaggle/input/vesuvius-challenge-ink-detection/%s/%s/inklabels.png" % (split, fragment))
    return pad_array(np.array(Image.open(inklabels_path)))

In [None]:
class SingleFragmentDataset(thd.Dataset):
    def __init__(self, fragment, is_test=False):
        self.is_test = is_test
        split = 'test' if is_test else 'train'
        self.surface = load_fragment_surface(fragment, split)
        print(self.surface.dtype)
        self.mask = load_mask(fragment, split)
        self.inklabels = load_inklabels(fragment, split) if not is_test else None
        self.pixels = np.stack(np.where(self.mask == 1), axis=1)
    
    def __len__(self):
        return self.pixels.shape[0]
    
    def get_pixel_number(self, y, x):
        return 1 + y * self.surface.shape[2] + x
    
    def __getitem__(self, index):
        y, x = self.pixels[index]
        y_start = y - BUFFER
        y_end = y + BUFFER + 1
        x_start = x - BUFFER
        x_end = x + BUFFER + 1
        patch_surface = np.s_[:, y_start:y_end, x_start:x_end]
        patch_labels = np.s_[y_start:y_end, x_start:x_end]
        surface = self.surface[patch_surface].astype(np.float32)
        labels = self.inklabels[y, x].reshape((1, )).astype(np.float32) if not self.is_test else None
        return (surface, labels) if not self.is_test else (surface, self.get_pixel_number(y, x))

Define the random predictive function

In [39]:
def predict(prob, inputs):
    rand = np.random.rand(inputs.shape[0], 1)
    result = np.zeros((inputs.shape[0], 1))
    result[np.where(rand > 1. - prob)] = 1
    return result

In [40]:
a = np.zeros((4, 5))

predict(0, a)

array([[0.],
       [0.],
       [0.],
       [0.]])

Load train fragment 1 because it's the smallest

In [41]:
class SingleFragmentDataset(thd.Dataset):
    def __init__(self, fragment, is_test=False):
        self.is_test = is_test
        split = 'test' if is_test else 'train'
        self.surface = load_fragment_surface(fragment, split)
        print(self.surface.dtype)
        self.mask = load_mask(fragment, split)
        self.inklabels = load_inklabels(fragment, split) if not is_test else None
        self.pixels = np.stack(np.where(self.mask == 1), axis=1)
    
    def __len__(self):
        return self.pixels.shape[0]
    
    def get_pixel_number(self, y, x):
        return 1 + y * self.surface.shape[2] + x
    
    def __getitem__(self, index):
        y, x = self.pixels[index]
        y_start = y - BUFFER
        y_end = y + BUFFER + 1
        x_start = x - BUFFER
        x_end = x + BUFFER + 1
        patch_surface = np.s_[:, y_start:y_end, x_start:x_end]
        patch_labels = np.s_[y_start:y_end, x_start:x_end]
        surface = self.surface[patch_surface].astype(np.float32)
        labels = self.inklabels[y, x].reshape((1, )).astype(np.float32) if not self.is_test else None
        return (surface, labels) if not self.is_test else (surface, self.get_pixel_number(y, x))

In [23]:
val_dataset = SingleFragmentDataset(1)
print(val_dataset.surface.shape)
val_loader = thd.DataLoader(val_dataset, BATCH_SIZE, shuffle=True)

Loading fragment 1 surface
float16
Loading fragment 1 mask
Loading fragment 1 labels
(10, 8201, 6350)


Define validation function

In [52]:
def evaluate(val_loader, threshold):
    print("Evaluating for threshold %f" % threshold)
    fbetas = []
    precisions = []
    recalls = []

    for i, (xs, ys) in enumerate(pbar := tqdm(val_loader, disable=DISABLE_TQDM)):
        if i > MAX_VAL_STEPS:
            break
        outputs = predict(threshold, xs.to(DEVICE))
        pred_ink = outputs
        fbeta = fbeta_score(ys.view(-1).numpy(), pred_ink.flatten(), beta=0.5)
        precision = precision_score(ys.view(-1).numpy(), pred_ink.flatten())
        recall = recall_score(ys.view(-1).numpy(), pred_ink.flatten())
        pbar.set_postfix({"precision": precision, "recall": recall, "fbeta": fbeta})

        fbetas.append(fbeta)
        precisions.append(precision)
        recalls.append(recall)
    
    return np.mean(fbetas), np.mean(precisions), np.mean(recalls)

Run validation

In [None]:
%%time

def evaluate_for_thresholds():
    thresholds = np.linspace(0, 1, 50)
    
    best_fbeta = 0.
    best_threshold = 0
    
    fbetas = []
    precisions = []
    recalls = []

    for threshold in thresholds:
        fbeta, precision, recall = evaluate(val_loader, threshold)
        if fbeta > best_fbeta:
            best_fbeta = fbeta
            best_threshold = threshold
        fbetas.append(fbeta)
        precisions.append(precision)
        recalls.append(recall)
    
    plt.plot(thresholds, fbetas, label="fbeta")
    plt.plot(thresholds, precisions, label="precision")
    plt.plot(thresholds, recalls, label="recall")
    plt.legend()
    
    return best_threshold

best_threshold = evaluate_for_thresholds()
print("Best threshold:", best_threshold)

Evaluating for threshold 0.000000


  1%|          | 1001/113840 [00:30<58:00, 32.42it/s, precision=0, recall=0, fbeta=0] 


Evaluating for threshold 0.020408


  1%|          | 1001/113840 [00:30<57:40, 32.61it/s, precision=0, recall=0, fbeta=0]               


Evaluating for threshold 0.040816


  1%|          | 1001/113840 [00:30<57:21, 32.78it/s, precision=0, recall=0, fbeta=0]               


Evaluating for threshold 0.061224


  1%|          | 1001/113840 [00:30<57:37, 32.64it/s, precision=0.182, recall=0.0909, fbeta=0.152]   


Evaluating for threshold 0.081633


  1%|          | 1001/113840 [00:30<57:44, 32.57it/s, precision=0.2, recall=0.08, fbeta=0.154]      


Evaluating for threshold 0.102041


  1%|          | 1001/113840 [00:30<58:07, 32.36it/s, precision=0.238, recall=0.104, fbeta=0.189]    


Evaluating for threshold 0.122449


  0%|          | 81/113840 [00:06<50:38, 37.43it/s, precision=0.333, recall=0.175, fbeta=0.282]      