In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt

plt.style.use("seaborn-white")
import seaborn as sns

sns.set_style("white")

from sklearn.model_selection import train_test_split

from torch import nn

from tqdm import tqdm
from torch.nn import Sequential

In [None]:
from image_processing import upsample, downsample
from data import prepare_data, test_images_path, load_images_as_arrays, TGSSaltDataset
from visualisation import (
    plot_coverage_and_coverage_class,
    scatter_coverage_and_coverage_class,
    plot_depth_distributions,
    plot_predictions,
    plot_images,
)
from model import model_path, save_checkpoint, update_state
from metrics import iou_metric_batch, my_iou_metric
from toolz import compose
from data import rle_encode
import datetime
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch

from torch.utils import data

from resnetlike import UNetResNet
from training import train, test
from collections import defaultdict
import logging
import random
from utils import create_optimizer, tboard_log_path
import uuid
import itertools as it
from operator import itemgetter
import shutil
from losses import lovasz_hinge

In [None]:
now = datetime.datetime.now()

In [None]:
img_size_target = 101
batch_size = 128
learning_rate = 0.1
epochs = 70
num_workers = 0
seed = 42
num_cycles = (
    6
)  # Using Cosine Annealing with warm restarts, the number of times to oscillate
notebook_id = f"{now:%d%b%Y}_{uuid.uuid4()}"
base_channels = 32
optim_config = {
    "optimizer": "sgd",
    "base_lr": 0.01,
    "momentum": 0.9,
    "weight_decay": 1e-4,
    "nesterov": True,
    "epochs": epochs,
    "scheduler": "cosine",
    "lr_min": 0,
}

In [None]:
logging.basicConfig(level=logging.INFO)
torch.backends.cudnn.benchmark = True
logger = logging.getLogger(__name__)
logger.info(f"Started {now}")

In [None]:
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
model = UNetResNet(1, base_channels)

In [None]:
device = torch.device("cuda:0")
model = nn.DataParallel(model)
model.to(device)

In [None]:
filename = os.path.join(model_path(), "model_lovasz_best_state.pth")
checkpoint = torch.load(filename)
model.load_state_dict(checkpoint["state_dict"])

In [None]:
def predict_tta(model, image):  # predict both orginal and reflect x
    with torch.no_grad():
        image_reflect = np.flip(image.numpy(), axis=3).copy()
        with torch.cuda.device(0):
            image_gpu = image.type(torch.float).cuda()
            image_reflect_gpu = torch.as_tensor(image_reflect).type(torch.float).cuda()

        outputs = model(image_gpu)
        outputs_reflect = model(image_reflect_gpu)
        return (
            outputs.cpu().numpy() + np.flip(outputs_reflect.cpu().numpy(), axis=3)
        ) / 2

In [None]:
model.eval()
predictions = [predict_tta(model, image) for image, _ in tqdm(val_data_loader)]

preds_valid = np.concatenate(predictions, axis=0).squeeze()
downsample_to = downsample(128, 101)
preds_valid = np.array(list(map(downsample_to, preds_valid)))
plot_predictions(
    train_df, preds_valid, ids_valid, max_images=15, grid_width=5, figsize=(16, 10)
)

In [None]:
## Scoring for last model, choose threshold using validation data
thresholds = np.linspace(0.3, 0.7, 31)
y_valid_down = np.array(list(map(downsample_to, y_valid.squeeze())))
thresholds = np.log(thresholds / (1 - thresholds))

ious = list(
    map(
        lambda th: iou_metric_batch(y_valid_down, np.int32(preds_valid > th)),
        tqdm(thresholds),
    )
)

threshold_best_index = np.argmax(ious)
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

In [None]:
plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))

In [None]:
preds_thresh_iter = map(
    lambda pred: np.array(np.round(pred > threshold_best), dtype=np.float32),
    preds_valid,
)

plot_predictions(
    train_df, preds_thresh, ids_valid, max_images=15, grid_width=5, figsize=(16, 10)
)
preds_thresh = np.array(list(map(downsample_to, preds_thresh_iter)))
plt.legend()

In [None]:
x_test = load_images_as_arrays(test_df.index, test_images_path())
x_test = list(map(upsample_to, x_test))
x_test = np.array(x_test).reshape(-1, 1, img_size_target, img_size_target)

In [None]:
dataset_test = TGSSaltDataset(x_test, is_test=True)

In [None]:
test_data_loader = data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False,
)

In [None]:
model.eval()
predictions = [predict_tta(model, image) for image in tqdm(test_data_loader)]

In [None]:
preds_test = np.concatenate(predictions, axis=0).squeeze()

In [None]:
transform = compose(rle_encode, np.round, downsample_to, lambda x: x > threshold_best)

In [None]:
pred_dict = {
    idx: transform(preds_test[i]) for i, idx in enumerate(tqdm(test_df.index.values))
}

In [None]:
sub = pd.DataFrame.from_dict(pred_dict, orient="index")
sub.index.names = ["id"]
sub.columns = ["rle_mask"]
filename = os.path.join(model_path(), f"submission_{now:%d%b%Y_%H}.csv")
sub.to_csv(filename)