[View in Colaboratory](https://colab.research.google.com/github/Romanm87/TGS_Salt/blob/master/unet.ipynb)

In [1]:
import numpy as np
import pandas as pd
from keras.models import Model
from keras import optimizers
from keras import backend as K
from sklearn.metrics import confusion_matrix
from google.colab import files
import tensorflow as tf
import cv2
import os
import json
import zipfile

import skimage.io as io
import skimage.transform as trans
from keras.layers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

Using TensorFlow backend.


In [4]:
##### Use kaggle API

!pip install kaggle
api_token = {"username":"romanm87","key":"#####################"}

os.chdir('/')
!mkdir ~/.kaggle #kaggle API searches in root directory for .kaggle/kaggle.json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)
!chmod 600 /root/.kaggle/kaggle.json

# API link from Kaggle:
!kaggle competitions download -c tgs-salt-identification-challenge

zip_ref = zipfile.ZipFile('train.zip', 'r')
zip_ref.extractall()
zip_ref.close()

Collecting kaggle
[?25l  Downloading https://files.pythonhosted.org/packages/c6/78/832b9a9ec6b3baf8ec566e1f0a695f2fd08d2c94a6797257a106304bfc3c/kaggle-1.4.7.1.tar.gz (52kB)
[K    100% |████████████████████████████████| 61kB 2.9MB/s 
Collecting python-slugify (from kaggle)
  Downloading https://files.pythonhosted.org/packages/00/ad/c778a6df614b6217c30fe80045b365bfa08b5dd3cb02e8b37a6d25126781/python-slugify-1.2.6.tar.gz
Collecting Unidecode>=0.04.16 (from python-slugify->kaggle)
[?25l  Downloading https://files.pythonhosted.org/packages/59/ef/67085e30e8bbcdd76e2f0a4ad8151c13a2c5bce77c85f8cad6e1f16fb141/Unidecode-1.0.22-py2.py3-none-any.whl (235kB)
[K    100% |████████████████████████████████| 235kB 6.2MB/s 
[?25hBuilding wheels for collected packages: kaggle, python-slugify
  Running setup.py bdist_wheel for kaggle ... [?25l- \ done
[?25h  Stored in directory: /root/.cache/pip/wheels/44/2c/df/22a6eeb780c36c28190faef6252b739fdc47145fd87a6642d4
  Running setup.py bdist_wheel for

In [0]:
##### Prepare data and model
tr_image_dir = os.chdir('/images')
train_im = os.listdir(tr_image_dir)
x = np.array([np.array(cv2.imread(p, cv2.IMREAD_GRAYSCALE)) for p in train_im]) / 255

tr_masks_dir = os.chdir('/masks')
train_ma = os.listdir(tr_masks_dir)
y = np.array([np.array(cv2.imread(p, cv2.IMREAD_GRAYSCALE)) for p in train_ma]) / 255

# expand dimensions for CNN inout and zero padding
x = np.expand_dims(x, axis=3)
y = np.expand_dims(y, axis=3)

# split training vs validation set
train_val_split = 0.1
x_train = x[0:int(x.shape[0]*(1-train_val_split)),:,:,:]
y_train = y[0:int(y.shape[0]*(1-train_val_split)),:,:,:]
x_val = x[int(x.shape[0]*(1-train_val_split)):,:,:,:]
y_val = y[int(y.shape[0]*(1-train_val_split)):,:,:,:]

# from: https://github.com/zhixuhao/unet/blob/master/model.py
def unet(pretrained_weights = None,input_size = (101,101,1)):
    inputs = Input(input_size)
    input_padded = ZeroPadding2D(padding=((14, 13), (14, 13)))(inputs)  ## use zero padding to match dims after maxpool/upsample
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(input_padded)
    conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), border_mode="same")(conv1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
    conv2 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), border_mode="same")(conv2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
    conv3 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), border_mode="same")(conv3)
    conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
    conv4 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2), border_mode="same")(drop4)

    conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
    conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
    drop5 = Dropout(0.5)(conv5)

    up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5))
    merge6 = merge([drop4,up6], mode = 'concat', concat_axis = 3)
    conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
    conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)

    #(None, 25, 25, 256), (None, 24, 24, 256)
    up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
    merge7 = merge([conv3,up7], mode = 'concat', concat_axis = 3)
    conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
    conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)

    up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
    merge8 = merge([conv2,up8], mode = 'concat', concat_axis = 3)
    conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
    conv8 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)

    up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
    merge9 = merge([conv1,up9], mode = 'concat', concat_axis = 3)
    conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
    conv9 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
    conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
    conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
    crop = Cropping2D(cropping=((14, 13), (14, 13)))(conv10)

    model = Model(inputs = inputs, output = crop)

    #model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy'])
    #model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'])
    
    #model.summary()

    if(pretrained_weights):
    	model.load_weights(pretrained_weights)

    return model



In [0]:
##### Define evaluation metrics
class MeanIoU(object):
    def __init__(self):
        super().__init__()
    def mean_iou(self, y_true, y_pred):
        # Wraps np_mean_iou method and uses it as a TensorFlow op.
        # Takes numpy arrays as its arguments and returns numpy arrays as outputs
        return tf.py_func(self.np_mean_iou, [y_true, y_pred], tf.float64)
    def np_mean_iou(self, y_true, y_pred):
        y_pred = np.round(y_pred + 0.05, 0).reshape(-1)
        y_true = y_true.reshape(-1)        
        conf = confusion_matrix(y_pred, y_true)        
        # Compute the IoU and mean IoU from the confusion matrix:
        true_positive = conf[1,1]
        false_positive = conf[1,0]
        false_negative = conf[0,1]
        # Just in case we get a division by 0, ignore/hide the error and set the value to 0
        with np.errstate(divide='ignore', invalid='ignore'):
            iou = true_positive / (true_positive + false_positive + false_negative)
        #iou[np.isnan(iou)] = 0
        return np.mean(iou).astype(np.float64)
miou = MeanIoU()

def bin_acc05(y_true, y_pred):
    return K.mean(K.equal(y_true, K.round(y_pred + 0.05)), axis=-1)

In [18]:
##### Define model 
#model = conv_net(input_shape, num_layers, num_filters, kernel_sizes)
model = unet()
adam = optimizers.Adam(lr = 0.001)
model.compile(loss = 'binary_crossentropy', optimizer='adam', 
              metrics=[bin_acc05, miou.mean_iou])
model.summary()

  name=name)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 101, 101, 1)  0                                            
__________________________________________________________________________________________________
zero_padding2d_4 (ZeroPadding2D (None, 128, 128, 1)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_47 (Conv2D)              (None, 128, 128, 64) 640         zero_padding2d_4[0][0]           
__________________________________________________________________________________________________
conv2d_48 (Conv2D)              (None, 128, 128, 64) 36928       conv2d_47[0][0]                  
__________________________________________________________________________________________________
max_poolin

In [8]:
##### Run model
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_val,y_val), verbose=1)

Train on 3600 samples, validate on 400 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f30f8826d68>

In [9]:
##### Predict results
y_train_pred = model.predict(x_train, verbose=1)
y_val_pred = model.predict(x_val, verbose=1)



In [12]:
y_train_pred.max()

0.38086942

In [0]:
###### Download results to local computer for evaluation
from google.colab import files
results_train = pd.DataFrame({'y_train': y_train.reshape(-1),'y_train_pred': y_train_pred.reshape(-1)})
results_train.to_csv("results_train.csv")
results_val = pd.DataFrame({'y_val': y_val.reshape(-1),'y_val_pred': y_val_pred.reshape(-1)})
results_val.to_csv("results_val.csv")
files.download('results_train.csv')
files.download('results_val.csv')