In [2]:
import glob

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import cv2

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, Activation, BatchNormalization, Conv2D
from keras.layers import MaxPooling2D, Dropout, UpSampling2D
import xgboost as xgb

%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 30, 5

Using TensorFlow backend.


In [3]:
TRAIN_IMAGES = glob.glob('../data/x_train/*.png')
CLEAN_IMAGES = glob.glob('../data/y_train/*.png')
TEST_IMAGES = glob.glob('../data/x_test/*.png')

In [4]:
def load_image(path):
    image_list = np.zeros((len(path), 258, 540, 1))
    for i, fig in enumerate(path):
        img = image.load_img(fig, color_mode='grayscale', target_size=(258, 540))
        x = image.img_to_array(img).astype('float32')
        #x = x / 255.0
        image_list[i] = x
    
    return image_list

x_train = load_image(TRAIN_IMAGES)[:,:,:,0]
y_train = load_image(CLEAN_IMAGES)
x_test = load_image(TEST_IMAGES)[:,:,:,0]

print(x_train.shape,y_train.shape, x_test.shape)

(144, 258, 540) (144, 258, 540, 1) (72, 258, 540)


In [12]:
x_train_cnn = np.load('../ensemble_data/x_train_cnn.npy')
x_train_edge = np.load('../ensemble_data/x_train_edge.npy')
x_train_med = np.load('../ensemble_data/x_train_med.npy')
x_train_thresh = np.load('../ensemble_data/x_train_thresh.npy')/255.0


In [13]:
x_test_cnn = np.load('../ensemble_data/x_test_cnn.npy')
x_test_edge = np.load('../ensemble_data/x_test_edge.npy')
x_test_med = np.load('../ensemble_data/x_test_med.npy')
x_test_thresh = np.load('../ensemble_data/x_test_thresh.npy')/255.0


In [10]:
x_train_norm = x_train/255.0
x_test_norm = x_test/255.0
y_train_norm = y_train.reshape(144, 258, 540, 1)/255.0


In [18]:
x_train_ens = np.array([np.dstack((x_train_norm[i], x_train_med[i], x_train_thresh[i], x_train_edge[i], x_train_cnn[i])) for i in range(114)])
x_valid_ens = np.array([np.dstack((x_train_norm[i], x_train_med[i], x_train_thresh[i], x_train_edge[i], x_train_cnn[i])) for i in range(114,144)])

In [21]:
x_test_ens = np.array([np.dstack((x_test_norm[i], x_test_med[i], x_test_thresh[i]/255.0, x_test_edge[i], x_test_cnn[i])) for i in range(72)])


# CNN Stacker

In [15]:
class StackCNN():
    def __init__(self):
        self.img_rows = 258
        self.img_cols = 540
        self.channels = 5
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        
        optimizer = Adam(lr=0.001)
        
        self.autoencoder_model = self.build_model()
        self.autoencoder_model.compile(loss='mse', optimizer=optimizer)
        self.autoencoder_model.summary()
    
    def build_model(self):
        input_layer = Input(shape=self.img_shape)
        
        # encoder
        h = Conv2D(64, (3, 3), activation='relu', padding='same')(input_layer)
        h = BatchNormalization()(h)
        
        h = Conv2D(32, (3, 3), activation='relu', padding='same')(h)
        h = BatchNormalization()(h)
                
        
        h = Conv2D(16, (3, 3), activation='relu', padding='same')(h)
        h = BatchNormalization()(h)

        h = Conv2D(32, (3, 3), activation='relu', padding='same')(h)
        h = BatchNormalization()(h)
        
        h = Conv2D(64, (3, 3), activation='relu', padding='same')(h)
 
        output_layer = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(h)
        
        
        return Model(input_layer, output_layer)
    
    def train_model(self, x_train, y_train, x_val, y_val, epochs, batch_size=16):
        early_stopping = EarlyStopping(monitor='val_loss',
                                       min_delta=0,
                                       patience=5,
                                       verbose=1, 
                                       mode='auto')
        history = self.autoencoder_model.fit(x_train, y_train,
                                             batch_size=batch_size,
                                             epochs=epochs,
                                             validation_data=(x_val, y_val))
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Test'], loc='upper left')
        plt.show()
    
    def eval_model(self, x_test):
        preds = self.autoencoder_model.predict(x_test)
        return preds

In [19]:
scnn = StackCNN()
scnn.train_model(x_train_ens.reshape(-1, 258, 540, 5), y_train_norm[:114], x_valid_ens.reshape(-1, 258, 540, 5), y_train_norm[114:], epochs=300, batch_size=20)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 258, 540, 5)       0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 258, 540, 64)      2944      
_________________________________________________________________
batch_normalization_9 (Batch (None, 258, 540, 64)      256       
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 258, 540, 32)      18464     
_________________________________________________________________
batch_normalization_10 (Batc (None, 258, 540, 32)      128       
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 258, 540, 16)      4624      
_________________________________________________________________
batch_normalization_11 (Batc (None, 258, 540, 16)      64        
__________

KeyboardInterrupt: 

In [23]:
pred_cnn = scnn.eval_model(x_test_ens)

# XGB Stacker

In [27]:
def XGBStacking(x_train,x_train_med,x_train_thresh,x_train_edge,x_train_cnn, y_train):

    #model = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1, max_depth = 5, alpha = 10, n_estimators = 4)
    i = 114
    j = 144
    x_train_ens = np.array([np.dstack((x_train[:i].ravel(), x_train_med[:i].ravel(), x_train_thresh[:i].ravel(), x_train_edge[:i].ravel(), x_train_cnn[:i].ravel()))])
    x_train_ens = x_train_ens.reshape((-1, 5))
    y_train_ens = y_train[:i].ravel()
    dtrain = xgb.DMatrix(x_train_ens, label = y_train_ens)
    #model.fit(x_train_ens,y_train_ens)
    
    x_valid_ens = np.array([np.dstack((x_train[i:j].ravel(), x_train_med[i:j].ravel(), x_train_thresh[i:j].ravel(), x_train_edge[i:j].ravel(), x_train_cnn[i:j].ravel()))])
    x_valid_ens = x_valid_ens.reshape((-1, 5))
    y_valid_ens = y_train[i:j].ravel()
    dvalid = xgb.DMatrix(x_valid_ens, label = y_valid_ens)
    
    evallist = [(dvalid, 'eval'), (dtrain, 'train')]

    param = {'max_depth': 12, 'objective': 'reg:linear', 'min_child_weight' : 1, 'gamma' : 0, 'subsample' : 0.8, 'colsample_bytree' : 0.9, 'n_estimators' : 6, 'eval_metric' : 'rmse', 'learning_rate': 0.1}
    num_round = 100
    model = xgb.train(param, dtrain, num_round, evallist)
    return model

In [28]:
xgbmodel = XGBStacking(x_train_norm,x_train_med,x_train_thresh,x_train_edge,x_train_cnn,y_train_norm)

[0]	eval-rmse:0.417431	train-rmse:0.448226
[1]	eval-rmse:0.383642	train-rmse:0.362088


KeyboardInterrupt: 

In [None]:
def xgbpredict(model,i):
    testxgb = np.array([np.dstack((x_test_norm[i].ravel(), x_test_med[i].ravel(), x_test_thresh[i].ravel(), x_test_edge[i].ravel(), x_test_cnn[i].ravel()))])
    dtest = xgb.DMatrix(testxgb.reshape((-1, 5)))
    preds = model.predict(dtest)
    preds_reshaped = cv2.resize(preds, (x_test_norm.shape[1], x_test_norm.shape[0]))
    return preds_reshaped

pred_xgb = np.array([xgbpredict(xgbmodel,i) for i in range(72)])