In [1]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import pdb
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras import backend as K
from matplotlib import pyplot as plt
import scipy
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom
from scipy.ndimage import imread

% matplotlib inline


Using TensorFlow backend.


In [2]:
train = pd.read_json('input/train.json')

In [3]:
def get_images(df):
    """Create 3-channel 'images'. Return rescale-normalised images."""
    images = []
    for i, row in df.iterrows():
        # Formulate the bands as 75x75 arrays
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 / band_2

        # Rescale
        r = (band_1 - band_1.min()) / (band_1.max() - band_1.min())
        g = (band_2 - band_2.min()) / (band_2.max() - band_2.min())
        b = (band_3 - band_3.min()) / (band_3.max() - band_3.min())

        rgb = np.dstack((r, g, b))
        images.append(rgb)
    return np.array(images)

In [4]:
X = get_images(train)

In [7]:
fig = plt.figure(200, figsize=(15, 15))
random_indicies = np.random.choice(range(len(X)), 9, False)
subset = X[random_indicies]
for i in range(9):
    ax = fig.add_subplot(3, 3, i + 1)
    ax.imshow(subset[i])
plt.show()

In [5]:
# One-hot encode the output labels.
y = to_categorical(train.is_iceberg.values, num_classes=2)


In [None]:
# Split the data into 80% training and 20% validation.
Xtr, Xv, ytr, yv = train_test_split(X, y, shuffle=False, test_size=0.20)

## And now model

## Submission

In [None]:
# Create the model and compile
model = create_model()
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])
# model.summary

## Train net
15 epo

In [None]:
init_epo = 0
num_epo = 30
end_epo = init_epo + num_epo


In [None]:
print('lr = {}'.format(K.get_value(model.optimizer.lr)))
history = model.fit(Xtr, ytr, validation_data=(Xv, yv), batch_size=32, epochs=end_epo, initial_epoch=init_epo)
init_epo += num_epo
end_epo = init_epo + num_epo


## Submission

In [None]:
test = pd.read_json('../input/test.json')
Xtest = get_images(test)
test_predictions = model.predict_proba(Xtest)
submission = pd.DataFrame({'id': test['id'], 'is_iceberg': test_predictions[:, 1]})
submission.to_csv('sub_fcn.csv', index=False)

In [None]:
submission.head(5)

## Heat maps

The FCN will learn to distinguish between boats and icebergs using the final 2 4x4 channels. Each 4x4 channel represents one of the classes. Overlaying the 4x4 channel for each class on the image produces a heat map showing the "boatness" or the "bergness" of each section of the 4x4 grid.

In [None]:
l = model.layers
conv_fn = K.function([l[0].input, K.learning_phase()], [l[-4].output])

In [None]:
def get_cm(inp, label):
    """Convert the 4x4 layer data to a 75x75 image."""
    conv = np.rollaxis(conv_fn([inp,0])[0][0],2,0)[label]
    return scipy.misc.imresize(conv, (75,75), interp='nearest')

In [None]:
def info_img(im_idx):
    """Generate heat maps for the boat (boatness) and iceberg (bergness) for image im_idx."""
    if (yv[im_idx][1] == 1.0):
        img_type = 'iceberg'
    else:
        img_type = 'boat'
    inp = np.expand_dims(Xv[im_idx], 0)
    img_guess = np.round(model.predict(inp)[0], 2)
    if (img_guess[1] > 0.5):
        guess_type = 'iceberg'
    else:
        guess_type = 'boat'
    cm0 = get_cm(inp, 0)
    cm1 = get_cm(inp, 1)
    print('truth: {}'.format(img_type))
    print('guess: {}, prob: {}'.format(guess_type, img_guess))
    plt.figure(1, figsize=(10, 10))
    plt.subplot(121)
    plt.title('Boatness')
    plt.imshow(Xv[im_idx])
    plt.imshow(cm0, cmap="cool", alpha=0.5)
    plt.subplot(122)
    plt.title('Bergness')
    plt.imshow(Xv[im_idx])
    plt.imshow(cm1, cmap="cool", alpha=0.5)


In [None]:
info_img(13)