In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras as k
from keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
train = pd.read_json('../input/train.json')
test = pd.read_json('../input/test.json')
train.inc_angle = train.inc_angle.replace('na', 0)
train = train[train.inc_angle>0]
test.inc_angle = test.inc_angle.replace('na', 0)
print('len of train set',len(train))

# Any results you write to the current directory are saved as output.

In [None]:
def band_to_images(df):
    images = []
    for x in df.index:
        band_1 = np.array(df.loc[x].band_1).reshape(75, 75)
        band_2 = np.array(df.loc[x].band_2).reshape(75, 75)
        band_3 = (band_1+band_2)/2
        #band_4 = band_1 / band_2
        band_1_scale = (band_1-band_1.mean())/(band_1.max()-band_1.min())
        band_2_scale = (band_2-band_2.mean())/(band_2.max()-band_2.min())
        band_3_scale = (band_3-band_3.mean())/(band_3.max()-band_3.min())
        #band_4_scale = (band_4-band_4.mean())/(band_4.max()-band_4.min())
        images.append(np.dstack((band_1_scale, band_2_scale, band_3_scale)))
    return np.array(images)
def data_augment(images):
    lr_images = []
    ud_images = []
    for x in range(0, images.shape[0]):
        band_1 = images[x, :, :, 0]
        band_2 = images[x, :, :, 1]
        band_3 = images[x, :, :, 2]
        #band_4 = images[x, :, :, 3]
        # lr augment
        band_1_lr = np.fliplr(band_1)
        band_2_lr = np.fliplr(band_2)
        band_3_lr = np.fliplr(band_3)
        #band_4_lr = np.fliplr(band_4)
        lr_images.append(np.dstack((band_1_lr, band_2_lr, band_3_lr)))
        #ud augment
        band_1_ud = np.flipud(band_1)
        band_2_ud = np.flipud(band_2)
        band_3_ud = np.flipud(band_3)
        #band_4_ud = np.flipud(band_4)
        ud_images.append(np.dstack((band_1_ud, band_2_ud, band_3_ud)))
    lr_images = np.array(lr_images)
    ud_images = np.array(ud_images)
    images = np.concatenate((images, ud_images, lr_images))
    return images

In [None]:
x_train = band_to_images(train)
x_train = data_augment(x_train)
y_train = train.is_iceberg
y_train = np.concatenate((y_train, y_train, y_train))
x_test = band_to_images(test)
idno_test = test.id

In [None]:
model = k.models.Sequential()
# conv1
model.add(k.layers.convolutional.Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(75, 75, 3)))
model.add(k.layers.convolutional.MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(k.layers.Dropout(0.2))
#conv2
model.add(k.layers.convolutional.Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(k.layers.convolutional.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(k.layers.Dropout(0.2))
#conv3
model.add(k.layers.convolutional.Conv2D(128, kernel_size=(3, 3), activation='relu' ))
model.add(k.layers.convolutional.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(k.layers.Dropout(0.2))
# conv4
model.add(k.layers.convolutional.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(k.layers.convolutional.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(k.layers.Dropout(0.2))
# fc1
model.add(k.layers.Flatten())
model.add(k.layers.Dense(512))
model.add(k.layers.Activation('relu'))
model.add(k.layers.Dropout(0.2))
#fc2
model.add(k.layers.Dense(256))
model.add(k.layers.Activation('relu'))
model.add(k.layers.Dropout(0.2))
#output
model.add(k.layers.Dense(1))
model.add(k.layers.Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=k.optimizers.Nadam(0.001), metrics=['accuracy'])
#model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')
mcp_save = ModelCheckpoint('md.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, epsilon=1e-4, mode='min')
history = model.fit(x_train, y_train, batch_size=32, epochs=20, verbose=1, validation_split=0.25, callbacks=[early_stopping, reduce_lr_loss, mcp_save])

model.load_weights(filepath = 'md.hdf5')
score = model.evaluate(x_train, y_train, verbose=1)
print('Train score:', score[0])
print('Train accuracy:', score[1])

pred_test = model.predict(x_test)
submission = pd.DataFrame({'id': idno_test, 'is_iceberg': pred_test.reshape((pred_test.shape[0]))})
submission.to_csv('cnn_keras.csv', index=False)



Thanks for fvzaur's kernals
[www.kaggle.com/fvzaur/iceberg-ship-classification-with-cnn-on-keras](http://)