In [None]:
BATCH_SIZE = 16
output_path = 'output_201902132137/'
weight_save_path = "output_201902132137/"

In [None]:
import pandas as pd
import numpy as np
from skimage.io import imread
import matplotlib.pyplot as plt
% matplotlib inline
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

ship_dir = '../ship_detection/data'
train_image_dir = os.path.join(ship_dir, 'train_v2')
test_image_dir = os.path.join(ship_dir, 'test_v2')

In [None]:
def make_image_gen(in_df, batch_size = BATCH_SIZE):
    all_batches = list(in_df.groupby('ImageId'))
    out_rgb = []
    out_mask = []
    while True:
        np.random.shuffle(all_batches)
        for c_img_id, c_masks in all_batches:
            rgb_path = os.path.join(train_image_dir, c_img_id)
            c_img = imread(rgb_path)
            c_mask = np.expand_dims(masks_as_image(c_masks['EncodedPixels'].values), -1)
            out_rgb += [c_img]
            out_mask += [c_mask]
            if len(out_rgb)>=batch_size:
                yield np.stack(out_rgb, 0)/255.0, np.stack(out_mask, 0)
                out_rgb, out_mask=[], []

In [None]:
valid_df = pd.read_csv('valid_df.csv')
valid_gen = make_image_gen(valid_df)
valid_x, valid_y = next(valid_gen)
print(valid_x.shape, valid_y.shape)

In [None]:
from segmentation_models import Unet

seg_model = Unet('resnet34')

weight_path = output_path+'seg_model_weights.best.hdf5'
seg_model.load_weights(weight_path)

In [None]:
class TTA_ModelWrapper():
    """A simple TTA wrapper for keras computer vision models.
    Args:
        model (keras model): A fitted keras model with a predict method.
    """

    def __init__(self, model):
        self.model = model

    def predict(self, X, verbose=1):
        """Wraps the predict method of the provided model.
        Augments the testdata with horizontal and vertical flips and
        averages the results.
        Args:
            X (numpy array of dim 4): The data to get predictions for.
        """
        p0 = self.model.predict(X, verbose=verbose)
        p1 = self.model.predict(np.fliplr(X),verbose=verbose)
        p2 = self.model.predict(np.flipud(X),verbose=verbose)
        p3 = self.model.predict(np.transpose(X,(0,2,1,3)),verbose=verbose)
        p4 = self.model.predict(np.rot90(X,1,(1,2)),verbose=verbose)
        p5 = self.model.predict(np.rot90(X,2,(1,2)),verbose=verbose)
        p6 = self.model.predict(np.rot90(X,3,(1,2)),verbose=verbose)
        p7 = self.model.predict(np.rot90(np.transpose(X,(0,2,1,3)),2),verbose=verbose)
#         print(p7.shape)
#         print(np.rot90(np.transpose(p7),2).shape)
        p = (p0 +
             (np.fliplr(p1)) +
             (np.flipud(p2)) +
             (np.transpose(p3,(0,2,1,3))) +
             (np.rot90(p4,3,(1,2))) +
             (np.rot90(p5,2,(1,2))) +
             (np.rot90(p6,1,(1,2))) +
             (np.rot90(np.transpose(p7,(0,2,1,3)),2))             
             ) / 8
        
        return p
    
    def _expand(self, x):
        return np.expand_dims(x, axis=0)

In [None]:
tta_model = TTA_ModelWrapper(seg_model)
pred_y = tta_model.predict(valid_x)

print(pred_y.shape, pred_y.min(), pred_y.max(), pred_y.mean())

In [None]:
fig, ax = plt.subplots(1, 1, figsize = (6, 6))
ax.hist(pred_y.ravel(), np.linspace(0, 1, 20))
ax.set_xlim(0, 1)
ax.set_yscale('log', nonposy='clip')

In [None]:
# src: https://www.kaggle.com/aglotero/another-iou-metric
def iou_metric(y_true_in, y_pred_in, print_table=False):
    labels = y_true_in
    y_pred = y_pred_in
    
    true_objects = 2
    pred_objects = 2

    intersection = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))[0]

    # Compute areas (needed for finding the union between all objects)
    area_true = np.histogram(labels, bins = true_objects)[0]
    area_pred = np.histogram(y_pred, bins = pred_objects)[0]
    area_true = np.expand_dims(area_true, -1)
    area_pred = np.expand_dims(area_pred, 0)

    # Compute union
    union = area_true + area_pred - intersection

    # Exclude background from the analysis
    intersection = intersection[1:,1:]
    union = union[1:,1:]
    union[union == 0] = 1e-9

    # Compute the intersection over union
    iou = intersection / union

    # Precision helper function
    def precision_at(threshold, iou):
        matches = iou > threshold
        true_positives = np.sum(matches, axis=1) == 1   # Correct objects
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
        return tp, fp, fn

    # Loop over IoU thresholds
    prec = []
    if print_table:
        print("Thresh\tTP\tFP\tFN\tPrec.")
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, iou)
        if (tp + fp + fn) > 0:
            p = tp / (tp + fp + fn)
        else:
            p = 0
        if print_table:
            print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
        prec.append(p)
    
    if print_table:
        print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
    return np.mean(prec)

def iou_metric_batch(y_true_in, y_pred_in):
    batch_size = y_true_in.shape[0]
    metric = []
    for batch in range(batch_size):
        value = iou_metric(y_true_in[batch], y_pred_in[batch])
        metric.append(value)
    return np.mean(metric)

In [None]:
from tqdm import tqdm_notebook

thresholds = np.linspace(0, 1, 50)
ious = np.array([iou_metric_batch(valid_y, np.int32(pred_y > threshold)) for threshold in tqdm_notebook(thresholds)])

In [None]:
threshold_best_index = np.argmax(ious[9:-10]) + 9
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]
plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

In [None]:
from skimage.morphology import label

tta_model = TTA_ModelWrapper(seg_model)
def predict(img, path=test_image_dir):
    c_img = imread(os.path.join(path, img))
    c_img = np.expand_dims(c_img, 0)/255.0
    cur_seg = tta_model.predict(c_img,0)
    
#     cur_seg = fullres_model.predict(c_img)[0]
    cur_seg = np.array(np.round(cur_seg[0,:,:,:] > threshold_best), dtype=np.float32)
#     cur_seg = binary_opening(cur_seg>0.99, np.expand_dims(disk(2), -1))
    return cur_seg, c_img

## Get a sample of each group of ship count
samples = valid_df.groupby('ships').apply(lambda x: x.sample(1))
fig, m_axs = plt.subplots(samples.shape[0], 4, figsize = (15, samples.shape[0]*4))
[c_ax.axis('off') for c_ax in m_axs.flatten()]

for (ax1, ax2, ax3, ax4), c_img_name in zip(m_axs, samples.ImageId.values):
    first_seg, first_img = predict(c_img_name, train_image_dir)
    ax1.imshow(first_img[0])
    ax1.set_title('Image: ' + c_img_name)
    ax2.imshow(first_seg[:, :, 0])
    ax2.set_title('Model Prediction')
    reencoded = masks_as_color(multi_rle_encode(first_seg[:, :, 0]))
    ax3.imshow(reencoded)
    ax3.set_title('Prediction Re-encoded')
    ground_truth = masks_as_color(masks.query('ImageId=="{}"'.format(c_img_name))['EncodedPixels'])
    ax4.imshow(ground_truth)
    ax4.set_title('Ground Truth')
    
fig.savefig('validation.png')

In [None]:
import pandas as pd
boat_df = pd.read_csv("../ship_detection/output/ship_detection_lafoss.csv")
boat_df.head()

In [None]:
is_boat = boat_df.p_ship>0.5
print('Found {} boats'.format(is_boat.sum()))

In [None]:
test_paths = np.array(os.listdir(test_image_dir))
print(len(test_paths), 'test images found')

In [None]:
from tqdm import tqdm_notebook

def pred_encode(img, **kwargs):
    cur_seg, _ = predict(img)
    cur_rles = multi_rle_encode(cur_seg, **kwargs)
    return [[img, rle] for rle in cur_rles if rle is not None]

out_pred_rows = []
for c_img_name in tqdm_notebook(boat_df.id[is_boat]):   #boat_df.id[is_boat]
    out_pred_rows += pred_encode(c_img_name)

In [None]:
sub = pd.DataFrame(out_pred_rows)
sub.columns = ['ImageId', 'EncodedPixels']
sub = sub[sub.EncodedPixels.notnull()]
sub.head()

In [None]:
sub1 = pd.read_csv('../data/sample_submission_v2.csv')
sub1 = pd.DataFrame(np.setdiff1d(sub1['ImageId'].unique(), sub['ImageId'].unique(), assume_unique=True), columns=['ImageId'])
sub1['EncodedPixels'] = None
print(len(sub1), len(sub))

sub = pd.concat([sub, sub1])
print(len(sub))

sub.to_csv(output_path+'submission'+'.csv', 
              index=False)
sub.head()