In [None]:
import pandas as pd 
import numpy as np 
import cv2

from keras import layers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Activation, Input, AveragePooling2D
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam


np.random.seed(42) 

In [None]:
data_dir = "/data"
output_dir = "/output"

In [None]:
df_train = pd.read_json(data_dir + '/train.json') 

In [None]:
def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))

    return np.array(imgs)

In [None]:
Xtrain = get_scaled_imgs(df_train)
Ytrain = np.array(df_train['is_iceberg'])

In [None]:
def get_more_images(imgs):
    
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
      
    for i in range(0,imgs.shape[0]):
        a=imgs[i,:,:,0]
        b=imgs[i,:,:,1]
        c=imgs[i,:,:,2]
        
        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        bv=cv2.flip(b,1)
        bh=cv2.flip(b,0)
        cv=cv2.flip(c,1)
        ch=cv2.flip(c,0)
        
        vert_flip_imgs.append(np.dstack((av, bv, cv)))
        hori_flip_imgs.append(np.dstack((ah, bh, ch)))
      
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
       
    more_images = np.concatenate((imgs,v,h))
    
    return more_images

In [None]:
Xtr_more = get_more_images(Xtrain) 

In [None]:
Ytr_more = np.concatenate((Ytrain,Ytrain,Ytrain))

In [80]:
def conv2d_bn(x,
              filters,
              num_row,
              num_col,
              padding='same',
              strides=(1, 1),
              name=None):
    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None
    x = Conv2D(
        filters, (num_row, num_col),
        strides=strides,
        padding=padding,
        use_bias=False,
        name=conv_name)(x)
    x = BatchNormalization(axis=3, scale=False, name=bn_name)(x)
    x = Activation('relu', name=name)(x)
    return x

def getModel():
    x_input = Input(shape=(75,75, 3))
    
    branch2x2 = conv2d_bn(x_input, 32, 2, 2, name="branch2x2")
    branch3x3 = conv2d_bn(x_input, 32, 3, 3, name="branch3x3")
    branch4x4 = conv2d_bn(x_input, 32, 4, 4, name="branch4x4")
    
    branch2x2p = AveragePooling2D((3, 3), strides=(3, 3), padding='same')(branch2x2)
    branch3x3p = AveragePooling2D((3, 3), strides=(3, 3), padding='same')(branch3x3)
    branch4x4p = AveragePooling2D((3, 3), strides=(3, 3), padding='same')(branch4x4)
    
    x = layers.concatenate(
        [branch2x2p, branch3x3p, branch4x4p],
        axis=3,
        name='mixed0')

    x = AveragePooling2D((3, 3), strides=(3, 3), padding='same')(x)
    x = conv2d_bn(x, 40, 3, 3)
    x = Dropout(0.5)(x)
    
    x = AveragePooling2D((3, 3), strides=(3, 3), padding='same')(x)
    x = conv2d_bn(x, 80, 3, 3)
    x = Dropout(0.5)(x)
    
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation='relu')(x)
    x = Dense(1, activation="sigmoid")(x)

    optimizer = Adam(lr=0.001, decay=1e-6)
    model = Model(inputs=[x_input], outputs=[x])
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

model = getModel()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_35 (InputLayer)           (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
branch2x2_conv (Conv2D)         (None, 75, 75, 32)   384         input_35[0][0]                   
__________________________________________________________________________________________________
branch3x3_conv (Conv2D)         (None, 75, 75, 32)   864         input_35[0][0]                   
__________________________________________________________________________________________________
branch4x4_conv (Conv2D)         (None, 75, 75, 32)   1536        input_35[0][0]                   
__________________________________________________________________________________________________
branch2x2_

In [None]:
batch_size = 32
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint(output_dir + '/.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

In [None]:
model.fit(Xtr_more, Ytr_more, batch_size=batch_size, epochs=50, verbose=1
          , callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_split=0.25)

In [None]:
model.load_weights(filepath = output_dir + '/.mdl_wts.hdf5')

score = model.evaluate(Xtrain, Ytrain, verbose=1)
print('Train score:', score[0])
print('Train accuracy:', score[1])

In [None]:
df_test = pd.read_json(data_dir + '/test.json')
df_test.inc_angle = df_test.inc_angle.replace('na',0)
Xtest = (get_scaled_imgs(df_test))
pred_test = model.predict(Xtest)

submission = pd.DataFrame({'id': df_test["id"], 'is_iceberg': pred_test.reshape((pred_test.shape[0]))})
print(submission.head(10))

submission.to_csv(output_dir + '/submission.csv', index=False)