In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ShuffleSplit, cross_val_score, train_test_split
from skimage.filters import gaussian
from functools import reduce
import keras
from keras.layers import Dense, Input, BatchNormalization, Concatenate, GlobalAveragePooling2D, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.models import Model, load_model


Using TensorFlow backend.


In [None]:
#data.py functions
def transform(df):
    band_1 = np.array([np.array(row).reshape(75, 75) for row in df['band_1']])
    band_2 = np.array([np.array(row).reshape(75, 75) for row in df['band_2']])
    band_3 = band_1 * band_2
    band_4 = gaussian(band_1 + band_2 / 2)
    
    x = np.concatenate([band_2[:, :, :, np.newaxis], band_1[:, :, :, np.newaxis],\
                       band_3[:, :, :, np.newaxis], band_4[:, :, :, np.newaxis]], axis=-1)
    angle = np.array([np.array(row) for row in df['inc_angle']])
    
    return x, angle 

    
def augment(images):
    image_mirror_lr = []
    image_mirror_ud = []
    for i in range(0,images.shape[0]):
        band_1 = images[i,:,:,0]
        band_2 = images[i,:,:,1]
        band_3 = images[i,:,:,2]
        band_4 = images[i,:,:,3]

        # mirror left-right
        band_1_mirror_lr = np.flip(band_1, 0)
        band_2_mirror_lr = np.flip(band_2, 0)
        band_3_mirror_lr = np.flip(band_3, 0)
        band_4_mirror_lr = np.flip(band_4, 0)
        image_mirror_lr.append(np.stack((band_1_mirror_lr, band_2_mirror_lr, band_3_mirror_lr, band_4_mirror_lr), axis=-1))
        
        # mirror up-down
        band_1_mirror_ud = np.flip(band_1, 1)
        band_2_mirror_ud = np.flip(band_2, 1)
        band_3_mirror_ud = np.flip(band_3, 1)
        band_4_mirror_ud = np.flip(band_4, 1)
        image_mirror_ud.append(np.stack((band_1_mirror_ud, band_2_mirror_ud, band_3_mirror_ud, band_4_mirror_ud), axis=-1))
        
    mirrorlr = np.array(image_mirror_lr)
    mirrorud = np.array(image_mirror_ud)
    images = np.concatenate((images, mirrorlr, mirrorud))
    return images

    
def split_data(imgs, angls, lbls, seed):
    rs = ShuffleSplit(n_splits=1, test_size=0.2, random_state=seed)
    tr_idx, val_idx = next(rs.split(imgs))
    train_im_1, train_im_2, train_ang, train_y = imgs[tr_idx, :, :, :2], imgs[tr_idx, :, :, 2:], angls[tr_idx], lbls[tr_idx]
    val_im_1, val_im_2, val_ang, val_y = imgs[val_idx, :, :, :2], imgs[val_idx, :, :, 2:], angls[val_idx], lbls[val_idx]
    
    return [train_im_1, train_im_2, train_ang, train_y], [val_im_1, val_im_2, val_ang, val_y]
    
#def make_gen(x_data_img, x_data_angle, y_data, batch_size):
#    num_images = len(x_data_img)
#
#    if len(y_data) == 1:
#        y_data = y_data[0]

#    while True:
#        idx1 = np.random.randint(0, num_images, batch_size)
#        #idx2 = np.random.randint(0, num_images, batch_size)
#
#        batch_x = [x_data_img[idx1, 2, :, :, :],\
#                   x_data_angle[idx1]]
#        
#        batch_y = y_data[idx1]
#
#        yield batch_x, batch_y
        
def prepare_data_train(filename='train.json'):
    train = pd.read_json(filename)
    train.inc_angle = train.inc_angle.replace('na', 0)
    train_X, angles = transform(train)
    train_y = np.array(train['is_iceberg'])
    train_X = augment(train_X)
    train_y = np.concatenate((train_y, train_y, train_y))
    angles = np.concatenate((angles, angles, angles))

    return train_X, angles, train_y
    
def prepare_data_test(filename='test.json'):
    test = pd.read_json(filename)
    test.inc_angle = test.inc_angle.fillna(0)
    ids = test['id']
    test_X, angles = transform(test)

    return test_X[:, :, :, :2], test_X[:, :, :, 2:], angles, ids

In [None]:
#network.py
def make_model():
    inp_img_1 = Input(shape=(75, 75, 2), name='input1_img')
    inp_img_2 = Input(shape=(75, 75, 2), name='input2_img')

    inp_angle = Input(shape=(1,), name='input_angle')
    
    C1, C2, C3 = 32, 64, 30
    
    img_flow_tpl1 = [BatchNormalization(),
                     Conv2D(C1, (3, 3), activation='relu', padding='valid'),
                     Conv2D(C1, (3, 3), activation='relu', padding='valid'),
                     MaxPooling2D((2, 2), strides=(2, 2)),
                     Dropout(DROPOUT_RATE),

                     Conv2D(C2, (3, 3), activation='relu', padding='valid'),
                     Conv2D(C2, (3, 3), activation='relu', padding='valid'),
                     MaxPooling2D((2, 2), strides=(2, 2)),
                     Dropout(DROPOUT_RATE),
                     
                     Flatten()
                     ]

    angle_flow_tpl1 = [BatchNormalization(),
                      Dense(16, activation='relu')
                     ]
    
    img_flow_tpl2 = [BatchNormalization(),
                     Conv2D(C1, (3, 3), activation='relu', padding='valid'),
                     Conv2D(C1, (3, 3), activation='relu', padding='valid'),
                     MaxPooling2D((2, 2), strides=(2, 2)),
                     Dropout(DROPOUT_RATE),

                     Conv2D(C2, (3, 3), activation='relu', padding='valid'),
                     Conv2D(C2, (3, 3), activation='relu', padding='valid'),
                     MaxPooling2D((2, 2), strides=(2, 2)),
                     Dropout(DROPOUT_RATE),
                     
                     Flatten()
                     ]
                     
    angle_flow_tpl2 = [BatchNormalization(),
                      Dense(16, activation='relu')
                     ]

    img_1_flow = [inp_img_1] + img_flow_tpl1
    img_2_flow = [inp_img_2] + img_flow_tpl2

    x1 = reduce(lambda x, y: y(x), img_1_flow)
    x2 = reduce(lambda x, y: y(x), img_2_flow)

    angle_1_flow = [inp_angle] + angle_flow_tpl1
    angle_2_flow = [inp_angle] + angle_flow_tpl2


    x1_angle = reduce(lambda x, y: y(x), angle_1_flow)
    x2_angle = reduce(lambda x, y: y(x), angle_2_flow)



    x1 = Concatenate(name='features1')([x1, x1_angle])
    x2 = Concatenate(name='features2')([x2, x2_angle])

    x = Concatenate()([x1, x2])
    x = Dense(50, activation='relu')(x)
    x = Dropout(DROPOUT_RATE)(x)
    x = Dense(50, activation='relu')(x)
    out = Dense(1, activation='sigmoid')(x)

    model = Model([inp_img_1, inp_img_2, inp_angle], out)

    adam = keras.optimizers.Adam(lr=0.001)
    model.compile(loss="binary_crossentropy", optimizer=adam, metrics=['accuracy'])

    return model
	
DROPOUT_RATE = 0.3
N_FOLDS = 1

def main():
    train_X, angles, train_y = prepare_data_train()
    for f in range(N_FOLDS):
        tr, val  = split_data(train_X, angles, train_y, f)

        tr_x, val_x = tr[:-1], val[:-1]
        tr_y, val_y = tr[-1], val[-1]

        model = make_model()
        model.summary()
        model.fit(tr_x, tr_y,
          batch_size=24,
          epochs=2,
          verbose=1,
          validation_data=(val_x, val_y))

        model.save('net' + str(f))

if __name__ == '__main__':
    main()

In [None]:
#load model and generate predictions, predict.py

def main():
    test_img_1, test_img_2, angles, ids = prepare_data_test()
    model = load_model('net0')
    out = model.predict(x=[test_img_1, test_img_2, angles]).flatten()
    d = {'id' : ids, 'is_iceberg' : out}

    pd.DataFrame(data=d).to_csv('out.csv', index=False)   
    print('Done')
if __name__ == '__main__':
    main()
