In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
# 폴더 압축 풀기
import zipfile

with zipfile.ZipFile('../input/tgs-salt-identification-challenge/train.zip', 'r') as z:
    z.extractall('train')
    
with zipfile.ZipFile('../input/tgs-salt-identification-challenge/test.zip', 'r') as z:
    z.extractall('test')

In [None]:
train_id = os.listdir('train/images')
test_id = os.listdir('test/images')

In [None]:
train = pd.read_csv("../input/tgs-salt-identification-challenge/train.csv")
train.head()

In [None]:
from keras.preprocessing.image import load_img

train['images'] = [np.array(load_img("train/images/" + i, grayscale=True)) / 255 for i in train_id]

In [None]:
train["id"] = train_id
train["id"] = train["id"].apply(lambda x: x.split(".")[0])

In [None]:
train["mask"] = [np.array(load_img("train/masks/" + i, grayscale=True)) / 255 for i in train_id] # 0,1로 바꿔주려고 255로 나눔

In [None]:
train["coverage"] = train["mask"].apply(np.sum) / 10201

In [None]:
train["coverage_class"] =  np.ceil(train["coverage"] * 10)

In [None]:
train["coverage_class"].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(np.array(train["images"].tolist()).reshape(-1, 101, 101, 1), 
                                                      np.array(train["mask"].tolist()).reshape(-1, 101, 101, 1), stratify=train["coverage_class"],
                                                      test_size=0.2, random_state=777)

In [None]:
from imgaug import augmenters as iaa
import imgaug as resize_iaa

def augmentation(aug1, aug2, X_train, y_train):
    aug_det = aug1.to_deterministic() # augmentation이 랜덤하게 되면 안된다.(random_state 고정하는 것과 같은 효과)
    X_train_aug = aug_det.augment_image(X_train)
    X_train_aug = aug2.augment_image(X_train_aug)
    y_train_aug = aug_det.augment_image(y_train)
    
    if y_train_aug.shape != (101, 101): # crop 하면 이미지 사이즈가 변한다.
        X_train_aug = resize_iaa.imresize_single_image(X_train_aug, (101, 101), interpolation="linear")
        y_train_aug = resize_iaa.imresize_single_image(y_train_aug, (101, 101), interpolation="nearest")
    
    return np.array(X_train_aug), np.array(y_train_aug)

sometimes = lambda x: iaa.Sometimes(0.5, x)
# aug1 : mask도 같이 augmentaion, aug2 : aug2만 적용
aug1 = iaa.Sequential([
    iaa.Fliplr(0.5), 
    iaa.OneOf([
        iaa.Affine(rotate=(-10, 10),translate_percent={"x": (-0.25, 0.25)}, mode='symmetric', cval=(0), backend="cv2"), #Affine transform : linear transform (선형 결합을 통한 차원변환)
        iaa.CropAndPad(percent=(-0.2, 0.2), pad_mode="reflect", pad_cval=0, keep_size=False), # 확대해서 crop
        iaa.Noop(), iaa.Noop()
    ])
]) # OneOf : augmentation 중에서 어떤걸 할지(같이 실행하지 않기위해), Noop : 아무것도 안하는것(3번중 1번 실행)

aug2 = iaa.Sequential(
    sometimes(iaa.Multiply((0.8, 1.2))),
    sometimes(iaa.Add((-0.2, 0.2))),
    sometimes(iaa.OneOf([iaa.AdditiveGaussianNoise(scale=(0, 0.05)), iaa.GaussianBlur(sigma=(0.0, 1.0))]))
)

def generator(image, mask, batch_size=32):
    idx = np.arange(len(image))
    images, masks = [], []
    
    while True:
        np.random.shuffle(idx)
        for i in idx:
            aug_img, aug_mask = augmentation(aug1, aug2, image[i], mask[i])
            images += [aug_img]
            masks += [aug_mask]
            
            if len(images) >= batch_size:
                yield np.stack(images, 0), np.stack(masks, 0)
                images, masks = [], []
                

In [None]:
# x, y = next(generator(X_train, y_train))
# plt.figure(figsize=(20, 12))
# plt.imshow(x[0].reshape(101, 101))

In [None]:
# degradiation 문제(gradient vanishing 문제) : train/valid 점수가 둘다 안나오면 학습 자체가 전달이 안된다.


In [None]:
from keras.models import Model
from keras.layers import *

def conv_block(x, filters, size, strides=(1,1), padding="same", activation=True):
    x = Conv2D(filters, size, strides=strides, padding=padding)(x)
    x = BatchNormalization()(x)
    if activation:
        x = Activation("relu")(x)
    return x

def res_block(block_input, num_filters=16):
    x = Activation("relu")(block_input)
    x = BatchNormalization()(x)
    x = conv_block(x, num_filters, (3,3)) # 함수는 층이 아니라서 연결시키지 않음 (return x)
    x = conv_block(x, num_filters, (3,3), activation=False)
    x = Add()([x, block_input])
    return x

def build_model(input_layer, neuron, dropout=0.5):
    cnn1 = Conv2D(neuron*1, (3,3), activation=None, padding="same")(input_layer)
    cnn1 = res_block(cnn1, neuron*1)
    cnn1 = res_block(cnn1, neuron*1)
    cnn1 = Activation("relu")(cnn1)
    poo1 = MaxPooling2D(2,2)(cnn1)
    poo1 = Dropout(dropout/2)(poo1)
    
    cnn2 = Conv2D(neuron*2, (3,3), activation=None, padding="same")(poo1)
    cnn2 = res_block(cnn2, neuron*2)
    cnn2 = res_block(cnn2, neuron*2)
    cnn2 = Activation("relu")(cnn2)
    poo2 = MaxPooling2D(2,2)(cnn2)
    poo2 = Dropout(dropout)(poo2)
    
    cnn3 = Conv2D(neuron*4, (3,3), activation=None, padding="same")(poo2)
    cnn3 = res_block(cnn3, neuron*4)
    cnn3 = res_block(cnn3, neuron*4)
    cnn3 = Activation("relu")(cnn3)
    poo3 = MaxPooling2D(2,2)(cnn3)
    poo3 = Dropout(dropout)(poo3)
    
    cnn4 = Conv2D(neuron*8, (3,3), activation=None, padding="same")(poo3)
    cnn4 = res_block(cnn4, neuron*8)
    cnn4 = res_block(cnn4, neuron*8)
    cnn4 = Activation("relu")(cnn4)
    poo4 = MaxPooling2D(2,2)(cnn4)
    poo4 = Dropout(dropout)(poo4)
    
    cnn_mid = Conv2D(neuron*16, (3,3), activation=None, padding="same")(poo4) # size=6
    cnn_mid = res_block(cnn_mid, neuron*16)
    cnn_mid = res_block(cnn_mid, neuron*16)
    cnn_mid = Activation("relu")(cnn_mid)
    
    dcnn4 = Conv2DTranspose(neuron*8, (3,3), strides=(2,2), padding="same")(cnn_mid) # size=12
    cnn4 = concatenate([dcnn4, cnn4])
    cnn4 = Dropout(dropout)(cnn4)
    cnn4 = Conv2D(neuron*8, (3,3), activation=None, padding="same")(cnn4)
    cnn4 = res_block(cnn4, neuron*8)
    cnn4 = res_block(cnn4, neuron*8)
    cnn4 = Activation("relu")(cnn4)
    
    dcnn3 = Conv2DTranspose(neuron*4, (3,3), strides=(2,2), padding="valid")(cnn4) # size=(24 -->) 25
    cnn3 = concatenate([dcnn3, cnn3])
    cnn3 = Dropout(dropout)(cnn3)
    cnn3 = Conv2D(neuron*4, (3,3), activation=None, padding="same")(cnn3)
    cnn3 = res_block(cnn3, neuron*4)
    cnn3 = res_block(cnn3, neuron*4)
    cnn3 = Activation("relu")(cnn3)
    
    dcnn2 = Conv2DTranspose(neuron*2, (3,3), strides=(2,2), padding="same")(cnn3) # size=50
    cnn2 = concatenate([dcnn2, cnn2])
    cnn2 = Dropout(dropout)(cnn2)
    cnn2 = Conv2D(neuron*2, (3,3), activation=None, padding="same")(cnn2)
    cnn2 = res_block(cnn2, neuron*2)
    cnn2 = res_block(cnn2, neuron*2)
    cnn2 = Activation("relu")(cnn2)
    
    dcnn1 = Conv2DTranspose(neuron*1, (3,3), strides=(2,2), padding="valid")(cnn2) # size=(100 -->) 101
    cnn1 = concatenate([dcnn1, cnn1])
    cnn1 = Dropout(dropout)(cnn1)
    cnn1 = Conv2D(neuron*1, (3,3), activation=None, padding="same")(cnn1)
    cnn1 = res_block(cnn1, neuron*1)
    cnn1 = res_block(cnn1, neuron*1)
    cnn1 = Activation("relu")(cnn1)
    
    cnn1 = Dropout(dropout/2)(cnn1)
    output_layer = Conv2D(1, (1,1), activation="sigmoid")(cnn1)
    
    return output_layer

In [None]:
input_layer = Input((101, 101, 1))
ouput_layer = build_model(input_layer, 16, 0.5)
model = Model(input_layer, ouput_layer)

In [None]:
model.summary()

In [None]:
train_generator = generator(X_train, y_train, 128)

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc"])

callbacks = [ModelCheckpoint("best.h5", save_best_only=True, verbose=1),
             EarlyStopping(patience=20),
             ReduceLROnPlateau(patience=8, factor=0.15, min_lr=0.00001, verbose=1)] # min_lr: 너무 낮아지는 것은 방지

model.fit_generator(train_generator, validation_data=[X_valid, y_valid], epochs=100, callbacks=callbacks, steps_per_epoch=np.ceil(len(X_train)/128))

In [None]:
model.load_weights("../input/best-weights/best.h5")

In [None]:
preds_valid = model.predict(X_valid).reshape(-1, 101, 101)

In [None]:
threshold = np.linspace(0.3, 0.7, 31)

In [None]:
def iou_metric(y_true_in, y_pred_in, print_table=False):
    labels = y_true_in
    y_pred = y_pred_in
    
    true_objects = 2
    pred_objects = 2

    #  if all zeros, original code  generate wrong  bins [-0.5 0 0.5],
    temp1 = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=([0,0.5,1], [0,0.5, 1]))
    intersection = temp1[0]
    area_true = np.histogram(labels,bins=[0,0.5,1])[0]
    area_pred = np.histogram(y_pred, bins=[0,0.5,1])[0]
    area_true = np.expand_dims(area_true, -1)
    area_pred = np.expand_dims(area_pred, 0)

    # Compute union
    union = area_true + area_pred - intersection

    # Exclude background from the analysis
    intersection = intersection[1:,1:]
    intersection[intersection == 0] = 1e-9
    
    union = union[1:,1:]
    union[union == 0] = 1e-9

    # Compute the intersection over union
    iou = intersection / union
    # Precision helper function
    def precision_at(threshold, iou):
        matches = iou > threshold
        true_positives = np.sum(matches, axis=1) == 1   # Correct objects
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
        return tp, fp, fn

    # Loop over IoU thresholds
    prec = []
    if print_table:
        print("Thresh\tTP\tFP\tFN\tPrec.")
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, iou)
        if (tp + fp + fn) > 0:
            p = tp / (tp + fp + fn)
        else:
            p = 0
        if print_table:
            print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
        prec.append(p)
        
    if print_table:
        print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
    return np.mean(prec)

def iou_metric_batch(y_true_in, y_pred_in):
    batch_size = y_true_in.shape[0]
    metric = []
    for batch in range(batch_size):
        value = iou_metric(y_true_in[batch], y_pred_in[batch])
        metric.append(value)
    return np.mean(metric)

In [None]:
ious = np.array([iou_metric_batch(y_valid, preds_valid>i) for i in threshold])

In [None]:
threshold_best_idx = np.argmax(ious)
iou_best = ious[threshold_best_idx]
threshold_best  = threshold[threshold_best_idx]

In [None]:
import matplotlib.pyplot as plt

plt.plot(threshold, ious)
plt.plot(threshold_best, iou_best, "og", label="best_threshold")
plt.legend()
plt.title("threshold : {} vs. iou : {}".format(threshold_best, iou_best))

In [None]:
def rle_encode(im):
    pixels = im.flatten(order = 'F')
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
from tqdm import tqdm_notebook
x_test = np.array([(np.array(load_img("test/images/{}".format(idx), grayscale = True))) / 255 for idx in tqdm_notebook(test_id)]).reshape(-1, 101, 101, 1)

In [None]:
preds_test = model.predict(x_test, verbose=1)

In [None]:
prediction_dict = {idx: rle_encode(preds_test[i]>threshold_best) for i, idx in enumerate(tqdm_notebook(test_id))}

In [None]:
sub = pd.DataFrame.from_dict(prediction_dict, orient="index").reset_index()
sub.columns = ["id", "rle_mask"]
sub["id"] = sub["id"].apply(lambda x: x[:-4])
sub.head(10)

In [None]:
sub.to_csv("sub.csv", index=0)