In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from random import randint

import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import seaborn as sns
sns.set_style("white")

from sklearn.model_selection import train_test_split

from skimage.transform import resize

from keras.preprocessing.image import load_img
from keras import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.models import load_model
from keras.optimizers import Adam
from keras.utils.vis_utils import plot_model
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, concatenate, Dropout ,BatchNormalization
from keras import backend as K
from tqdm import tqdm_notebook,tnrange
from skimage.util import pad

# 准备
img_size_ori = 101
img_size_target = 256

def upsample(img):
    if img_size_ori == img_size_target:
        return img
    return resize(img, (img_size_target, img_size_target), mode='constant', preserve_range=True)
    #res = np.zeros((img_size_target, img_size_target), dtype=img.dtype)
    #res[:img_size_ori, :img_size_ori] = img
    #return res
def upsample_v2(img):
    if img_size_ori == img_size_target:
        return img
    return resize(img, (256, 256), mode='constant', preserve_range=True)
    #res = np.zeros((img_size_target, img_size_target), dtype=img.dtype)
    #res[:img_size_ori, :img_size_ori] = img
    #return res    
def reflect_pad(img):
    return pad(resize(img, (101*2, 101*2), mode='constant', preserve_range=True),27,'reflect')

def downsample(img):
    if img_size_ori == img_size_target:
        return img
    return resize(img, (img_size_ori, img_size_ori), mode='constant', preserve_range=True)
    #return img[:img_size_ori, :img_size_ori]

train_df = pd.read_csv("/home/zhangs/lyc/salt/train.csv", index_col="id", usecols=[0])
depths_df = pd.read_csv("/home/zhangs/lyc/salt/depths.csv", index_col="id")
train_df = train_df.join(depths_df)
test_df = depths_df[~depths_df.index.isin(train_df.index)]#将生成id不在train中的样本id集合
train_df["images"] = [np.array(load_img("/home/zhangs/lyc/salt/train/images/{}.png".format(idx), grayscale=True))/ 255 for idx in tqdm_notebook(train_df.index)]
train_df["masks"] = [np.array(load_img("/home/zhangs/lyc/salt/train/masks/{}.png".format(idx), grayscale=True)) / 255 for idx in tqdm_notebook(train_df.index)]
train_df["coverage"] = train_df.masks.map(np.sum) / pow(img_size_ori, 2)
def cov_to_class(val):    
    for i in range(0, 11):
        if val * 10 <= i :
            return i
train_df["coverage_class"] = train_df.coverage.map(cov_to_class)
# 将深度信息放入训练图像
MAX_DEPTH = max(train_df["z"])
print('**** Max depth in train set is :'+str(MAX_DEPTH))
train_df["depth"] = [np.ones_like(train_df.loc[i]["images"]) * train_df.loc[i]["z"] / MAX_DEPTH
                     for i in tqdm_notebook(train_df.index)]

# Image in layer1 + depth in layer2
train_df["images_d"] = [np.dstack((train_df["images"][i],train_df["depth"][i])) for i in tqdm_notebook(train_df.index)]
train_df["images_d"][0].shape
# Free up some RAM
del depths_df
# del train_df["images"]
# Sanity check

# Cut the train and valid set ，use K-flods

In [None]:
K_flods = 5
from sklearn.model_selection import StratifiedKFold
X = train_df.index.values
y = train_df.coverage_class
skf = StratifiedKFold(n_splits=K_flods,random_state=1337)
skf.get_n_splits(X, y)
print(skf)
ids_train,ids_valid,x_train, x_valid, y_train, y_valid, cov_train, cov_test, depth_train, depth_test=[[] for x in range(10)]
X_whole = np.array(train_df.images.map(reflect_pad).tolist()).reshape(-1, 256, 256, 1)
y_whole = np.array(train_df.masks.map(reflect_pad).tolist()).reshape(-1, 256, 256, 1)

In [None]:
for i,[train_index, test_index] in enumerate(skf.split(X, y)):
    print("the %dth flod:"%i)
    print("TRAIN:", train_index.shape, "TEST:", test_index.shape)
    ids_train.append(X[train_index])
    ids_valid.append(X[test_index])
    #
    x_train.append(X_whole[train_index])
    x_valid.append(X_whole[test_index])
    #
    y_train.append(y_whole[train_index])
    y_valid.append(y_whole[test_index]) 
    #
    cov_train.append(train_df.coverage.values[train_index]) 
    cov_test.append(train_df.coverage.values[test_index])
    #
    depth_train.append(train_df.z.values[train_index]) 
    depth_test.append(train_df.z.values[test_index]) 
    
    if i == 1:
        break
del X_whole,y_whole
print(len(x_train))
print(x_train[0].shape)

In [None]:
# 画各个flods的salt分布图，检验k-flods是否正确
def plot_flods_coverage(cov,flods_num=5,mode='train'):
    fig, axs = plt.subplots(1, flods_num+1, figsize=(15,5))
    sns.distplot(train_df.coverage, kde=False, ax=axs[0])
    for i in range(1,flods_num+1):
        sns.distplot(cov[i-1], bins=10, kde=False, ax=axs[i])
        axs[i].set_xlabel("Coverage of k%d"%(i-1))
    plt.suptitle("Salt coverage of k-flods "+mode)
    axs[0].set_xlabel("Coverage")
    plt.show()
plot_flods_coverage(cov_train,flods_num=5,mode='train') 
plot_flods_coverage(cov_test,flods_num=5,mode='test')

# Data argumantant

In [None]:
for i in range(1):
    x_l_r_flip = [np.fliplr(x) for x in x_train[i]]
    y_l_r_flip = [np.fliplr(x) for x in y_train[i]]

    x_train[i] = np.append(x_train[i], x_l_r_flip, axis=0)
    y_train[i] = np.append(y_train[i], y_l_r_flip, axis=0)

In [None]:
print(x_train[0].shape)

# Model build

In [None]:
from pipline.loss import *

SyntaxError: from __future__ imports must occur at the beginning of the file (loss.py, line 70)

In [None]:
dpt = 5
DPT_SIZE = int(img_size_target/pow(2,dpt))

In [None]:
from segmentation_models.segmentation_models import Unet
from segmentation_models.segmentation_models.utils import set_trainable

In [None]:
help()

In [None]:
history_all = []
fig, axs = plt.subplots(1, 3, figsize=(15,5))
for i in range(0,1): 
    model = Unet(input_shape=(256,256,3),backbone_name='resnet34', encoder_weights='imagenet', freeze_encoder=False,decoder_use_batchnorm=True)
    model.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy",mean_iou])

    model.summary()
    # continue training    "trained_models/%dth_flod.model"%i
    early_stopping = EarlyStopping(monitor = 'mean_iou',mode='max',patience=5, verbose=1)
    model_checkpoint = ModelCheckpoint("trained_models/%dth_flod.model"%i, save_best_only=True, verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',mode='min',factor=0.5, patience=5, min_lr=0.00001, verbose=1)
    reduce_lr0 = ReduceLROnPlateau(monitor = 'val_loss',mode='min',factor=0.5, patience=1, min_lr=0.00001, verbose=1)
    #pretrain model decoder
#     model.fit(np.repeat(x_train[i][..., :1],3,axis=-1),
#               y_train[i],
#               validation_data=(np.repeat(x_valid[i][..., :1],3,axis=-1),
#               y_valid[i]), 
#               epochs=4,
#               batch_size=32,
#               callbacks=[model_checkpoint, reduce_lr0])
#     # release all layers for training
#     set_trainable(model) # set all layers trainable and recompile model
    
    #model.compile(loss=focal_loss, optimizer="sgd", metrics=["accuracy",mean_iou])
    history = model.fit(np.repeat(x_train[i][..., :1],3,axis=-1),
                          y_train[i],
                          validation_data=(np.repeat(x_valid[i][..., :1],3,axis=-1),
                          y_valid[i]), 
                          epochs=160,
                          batch_size=32,
                          callbacks=[model_checkpoint, reduce_lr,early_stopping])
    history_all.append(history)
    axs[i][0].plot(history.epoch, history.history["loss"], label="Train loss")
    axs[i][0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
    axs[i][1].plot(history.epoch, history.history["acc"], label="Train accuracy")
    axs[i][1].plot(history.epoch, history.history["val_acc"], label="Validation accuracy")
    axs[i][2].plot(history.epoch, history.history["mean_iou"], label="Train iou")
    axs[i][2].plot(history.epoch, history.history["val_mean_iou"], label="Validation iou")

In [None]:
plt.show()

# Predit with K-folds

In [None]:
def predit_with_kfolds(K_flods,x_img):
    preds_valid_all = []
    for i in range(K_flods):
        model_flods = load_model("trained_models/%dth_flod.model"%i, custom_objects={'mean_iou': mean_iou})
    #     model.append(model_flods)
        #此处的validation为第0组flod
        preds_valid_flods = model_flods.predict(np.repeat(x_img,3,axis=-1))
        print(preds_valid_flods.shape)
        preds_valid_flods = np.array([downsample(x) for x in preds_valid_flods[:,27:229,27:229,:]])
        print(preds_valid_flods.shape)
        preds_valid_all.append(preds_valid_flods)
    preds_valid = (preds_valid_all[0]+preds_valid_all[1]+preds_valid_all[2]+preds_valid_all[3]+preds_valid_all[4])/5
    return preds_valid

def predit_with_one_fold(model_num,x_img):
    model_flods = load_model("trained_models/%dth_flod.model"%model_num, custom_objects={'mean_iou': mean_iou})
    #     model.append(model_flods)
        #此处的validation为第0组flod
    preds_valid_flods = model_flods.predict(np.repeat(x_img,3,axis=-1))
    print(preds_valid_flods.shape)
    preds_valid_flods = np.array([downsample(x) for x in preds_valid_flods[:,27:229,27:229,:]])
    print(preds_valid_flods.shape)
    return preds_valid

In [None]:
valid_x = np.array(my_test_pd.images.map(reflect_pad).tolist()).reshape(-1, 256, 256, 1)
valid_y = np.array(my_test_pd.masks.map(reflect_pad).tolist()).reshape(-1, 256, 256, 1)
valid_x.shape

### one model predit

In [None]:
preds_valid = predit_with_one_fold(0,valid_x)
y_valid = np.array([downsample(x) for x in valid_y])

In [None]:
model_flods = load_model("finetue0", custom_objects={'mean_iou': mean_iou})
preds_valid_flods = model_flods.predict(np.repeat(x_valid[0][..., :1],3,axis=-1))
print(preds_valid_flods.shape)
preds_valid = np.array([downsample(x) for x in preds_valid_flods[:,27:229,27:229,:]])
print(preds_valid_flods.shape)

In [None]:
y_valid = np.array([downsample(x) for x in  y_valid[0]])

In [None]:
preds_valid = predit_with_kfolds(K_flods,valid_x)
# preds_valid_all = []
# for i in range(K_flods):
#     model_flods = load_model("trained_models/%dth_flod.model"%i, custom_objects={'mean_iou': mean_iou})
# #     model.append(model_flods)
#     #此处的validation为第0组flod
#     preds_valid_flods = model_flods.predict({'img': x_valid[0][..., :1], 
#                             'depth': x_valid[0][:, 60:60+DPT_SIZE, 60:60+DPT_SIZE, 1:]}).reshape(-1, img_size_target, img_size_target)
#     preds_valid_flods = np.array([downsample(x) for x in preds_valid_flods])
#     preds_valid_all.append(preds_valid_flods)
# preds_valid = (preds_valid_all[0]+preds_valid_all[1]+preds_valid_all[2]+preds_valid_all[3]+preds_valid_all[4])/5
y_valid = np.array([downsample(x) for x in valid_y])

In [None]:
preds_valid.shape

In [None]:
# max_images = 60
# grid_width = 15
# grid_height = int(max_images / grid_width)
# fig, axs = plt.subplots(grid_height, grid_width, figsize=(grid_width, grid_height))
# for i, idx in enumerate(ids_valid[60:60+max_images]):
#     img = train_df.loc[idx].images
#     mask = train_df.loc[idx].masks
#     pred = preds_valid[i]
#     ax = axs[int(i / grid_width), i % grid_width]
#     ax.imshow(img, cmap="Greys")
#     ax.imshow(mask, alpha=0.3, cmap="Greens")
#     ax.imshow(pred, alpha=0.3, cmap="OrRd")
#     ax.text(1, img_size_ori-1, train_df.loc[idx].z, color="black")
#     ax.text(img_size_ori - 1, 1, round(train_df.loc[idx].coverage, 2), color="black", ha="right", va="top")
#     ax.text(1, 1, train_df.loc[idx].coverage_class, color="black", ha="left", va="top")
#     ax.set_yticklabels([])
#     ax.set_xticklabels([])
# plt.suptitle("Green: salt, Red: prediction. Top-left: coverage class, top-right: salt coverage, bottom-left: depth")

In [None]:
def iou_metric(y_true_in, y_pred_in, print_table=False):
    labels = y_true_in
    y_pred = y_pred_in
    
    true_objects = 2
    pred_objects = 2

    intersection = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))[0]

    # Compute areas (needed for finding the union between all objects)
    area_true = np.histogram(labels, bins = true_objects)[0]
    area_pred = np.histogram(y_pred, bins = pred_objects)[0]
    area_true = np.expand_dims(area_true, -1)
    area_pred = np.expand_dims(area_pred, 0)

    # Compute union
    union = area_true + area_pred - intersection

    # Exclude background from the analysis
    intersection = intersection[1:,1:]
    union = union[1:,1:]
    union[union == 0] = 1e-9

    # Compute the intersection over union
    iou = intersection / union
    # Precision helper function
    def precision_at(threshold, iou):
        matches = iou > threshold
        true_positives = np.sum(matches, axis=1) == 1   # Correct objects
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
        return tp, fp, fn

    # Loop over IoU thresholds
    prec = []
    if print_table:
        print("Thresh\tTP\tFP\tFN\tPrec.")
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, iou)
        if (tp + fp + fn) > 0:
            p = tp / (tp + fp + fn)
        else:
            p = 0
        if print_table:
            print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
        prec.append(p)
    
    if print_table:
        print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
    return np.mean(prec)

def iou_metric_batch(y_true_in, y_pred_in):
    batch_size = y_true_in.shape[0]
    metric = []
    for batch in range(batch_size):
        value = iou_metric(y_true_in[batch], y_pred_in[batch])
        metric.append(value)
    return np.mean(metric)


In [None]:
thresholds = np.linspace(0, 1, 50)
ious = np.array([iou_metric_batch(y_valid, np.int32(preds_valid > threshold)) for threshold in tqdm_notebook(thresholds)])
threshold_best_index = np.argmax(ious[9:-10]) + 9
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

In [None]:
plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

In [None]:
plt.show()


# Another sanity check with adjusted threshold

Again some sample images with the adjusted threshold.


In [None]:
# max_images = 60
# grid_width = 15
# grid_height = int(max_images / grid_width)
# fig, axs = plt.subplots(grid_height, grid_width, figsize=(grid_width, grid_height))
# for i, idx in enumerate(ids_valid[:max_images]):
#     img = train_df.loc[idx].images
#     mask = train_df.loc[idx].masks
#     pred = preds_valid[i]
#     ax = axs[int(i / grid_width), i % grid_width]
#     ax.imshow(img, cmap="Greys")
#     ax.imshow(mask, alpha=0.3, cmap="Greens")
#     ax.imshow(np.array(np.round(pred > threshold_best), dtype=np.float32), alpha=0.3, cmap="OrRd")
#     ax.text(1, img_size_ori-1, train_df.loc[idx].z, color="black")
#     ax.text(img_size_ori - 1, 1, round(train_df.loc[idx].coverage, 2), color="black", ha="right", va="top")
#     ax.text(1, 1, train_df.loc[idx].coverage_class, color="black", ha="left", va="top")
#     ax.set_yticklabels([])
#     ax.set_xticklabels([])
# plt.suptitle("Green: salt, Red: prediction. Top-left: coverage class, top-right: salt coverage, bottom-left: depth")


# Submission

Load, predict and submit the test image predictions.


In [None]:
# Source https://www.kaggle.com/bguberfain/unet-with-depth
def RLenc(img, order='F', format=True):
    """
    img is binary mask image, shape (r,c)
    order is down-then-right, i.e. Fortran
    format determines if the order needs to be preformatted (according to submission rules) or not

    returns run length as an array or string (if format is True)
    """
    bytes = img.reshape(img.shape[0] * img.shape[1], order=order)
    runs = []  ## list of run lengths
    r = 0  ## the current run length
    pos = 1  ## count starts from 1 per WK
    for c in bytes:
        if (c == 0):
            if r != 0:
                runs.append((pos, r))
                pos += r
                r = 0
            pos += 1
        else:
            r += 1

    # if last run is unsaved (i.e. data ends with 1)
    if r != 0:
        runs.append((pos, r))
        pos += r
        r = 0

    if format:
        z = ''

        for rr in runs:
            z += '{} {} '.format(rr[0], rr[1])
        return z[:-1]
    else:
        return runs

In [None]:
x_test = np.array([reflect_pad(np.array(load_img("test/images/{}.png".format(idx), grayscale=True))) / 255 for idx in tqdm_notebook(test_df.index)]).reshape(-1, img_size_target, img_size_target, 1)
# Create depth layer
del x_train,y_train,x_valid,y_valid
# x_test_d = [np.ones((DPT_SIZE,DPT_SIZE,1)) * (test_df.loc[i]["z"] / MAX_DEPTH)
#                      for i in tqdm_notebook(test_df.index)] 
# x_test_d = np.array(x_test_d).reshape(-1, DPT_SIZE, DPT_SIZE, 1)
# x_test_d.shape
preds_test = predit_with_one_fold(0,x_test)[:,27:229,27:229,:]

# preds_test = predit_with_kfolds(K_flods,x_test)[:,27:229,27:229,:]
# preds_test = model.predict({'img': x_test, 'depth': x_test_d})
pred_dict = {idx: RLenc(np.round(downsample(preds_test[i]) > 
            threshold_best)) for i, idx in enumerate(tqdm_notebook(test_df.index.values))}

sub = pd.DataFrame.from_dict(pred_dict,orient='index')
sub.index.names = ['id']
sub.columns = ['rle_mask']
sub.to_csv('submission.csv')