Model from https://www.kaggle.com/sinkie/keras-data-augmentation-with-multiple-inputs/notebookm

In [31]:
import numpy as np
np.random.seed(656)
import os
import keras
import pandas as pd
from sklearn.model_selection import train_test_split
from IPython.display import FileLink
from keras.preprocessing import image
from subprocess import check_output
from matplotlib import pyplot
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten
from keras.layers import GlobalMaxPooling2D, merge, BatchNormalization, Permute
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping

In [2]:
#Load data
train = pd.read_json("/home/odenigborig/Data/kaggle/statoli_iceberg/data/processed/train.json")
test = pd.read_json("/home/odenigborig/Data/kaggle/statoli_iceberg/data/processed/test.json")

In [3]:
#replace na with zeros in inc_angle (incline) columne
train.inc_angle = train.inc_angle.replace('na', 0) 
train.inc_angle = train.inc_angle.astype(float).fillna(0.0)

test.inc_angle = test.inc_angle.replace('na', 0) 
test.inc_angle = test.inc_angle.astype(float).fillna(0.0)

In [4]:
#Prepare training data
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train["band_1"]])
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in train["band_2"]])
X_train = np.concatenate([x_band1[:, :, :, np.newaxis],
                          x_band2[:, :, :, np.newaxis],
                          ((x_band1 + x_band2)/2)[:,:,:, np.newaxis]], axis=-1)
X_angle_train = np.array(train.inc_angle)
y_train = np.array(train["is_iceberg"])

#Prepare test data
x_band1 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test["band_1"]])
x_band2 = np.array([np.array(band).astype(np.float32).reshape(75,75) for band in test["band_2"]])
X_test = np.concatenate([x_band1[:, :, :, np.newaxis],
                          x_band2[:, :, :, np.newaxis],
                          ((x_band1 + x_band2)/2)[:,:,:, np.newaxis]], axis=-1)
X_angle_test = np.array(test.inc_angle)

In [5]:
# Prepare validation set
X_train, X_valid, X_angle_train, X_angle_valid, y_train, y_valid = train_test_split(X_train, 
            X_angle_train,y_train, random_state=123, train_size=0.75)

## Create CNN model

In [76]:
#functions
def get_callbacks(filepath, patience=2):
    #callback to save model after each epoch
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]

def get_model(p_dropout=0):
    p_activation = "relu"
        
    img_input = Input(shape=(75,75,3), name="image_input")
    angle_input = Input(shape=(1,), name="input_angle")

    cnn_input = Permute([3,1,2])(img_input)
    cnn = BatchNormalization()(cnn_input)
    
    for i in range(4):
        cnn = Conv2D(8*2**i, 3, 3, activation=p_activation, name='conv_' + str(i))(cnn)
        #print(cnn._keras_shape)
        cnn = MaxPooling2D((2,2), name='max_pool_' + str(i))(cnn)
        cnn = Dropout(p_dropout)(cnn)
        #print(cnn._keras_shape)

    cnn = GlobalMaxPooling2D()(cnn)

    #use lower case merge for concatenation of TensorVariables, uppercase Merge is for layers
    cnn = merge([cnn, angle_input], mode='concat',concat_axis=1)

    cnn = Dense(32,activation=p_activation) (cnn)
    cnn_output = Dense(1, activation='sigmoid')(cnn)

    model = Model(input=[img_input, angle_input], output=cnn_output,name='CNN')
    model.compile(optimizer='adam', loss="binary_crossentropy", metrics=["accuracy"])

    return model



## Fit model

In [None]:
path = os.getcwd()
model_path = path + 'models/'
file_path = model_path + ".model2_weights.hdf5"
callbacks = get_callbacks(filepath=file_path, patience=5)

In [None]:
model = get_model()

In [33]:
model.fit([X_train, X_angle_train], y_train, nb_epoch=25, 
          validation_data=([X_valid, X_angle_valid], y_valid), 
          batch_size=64, callbacks = callbacks)

Train on 1203 samples, validate on 401 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25


<keras.callbacks.History at 0x7f0bd3905750>

In [32]:
model.save_weights(model_path+'model2_2.h5')
prediction = model.predict([X_test, X_angle_test], verbose=1)
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.to_csv("mod2_subm2.csv", index=False)

#create downloadable submission file to upload
submission_filename = 'mod2_subm2.csv'

FileLink(submission_filename)



## Data Augmentation

In [39]:
#specify data augmentation types
batch_size = 64

#define generator and image transformations
gen = image.ImageDataGenerator(rotation_range=40, width_shift_range=0.1,
                               height_shift_range=0.1, zoom_range=0.1, 
                               vertical_flip = True, horizontal_flip=True,
                               dim_ordering = 'tf')

#Merge two generators with the same random seed
def gen_flow_two_inputs(img_data, angle_data, y_label):
    gen1 = gen.flow(X=img_data,y=y_label, batch_size=batch_size, seed=656)
    gen2 = gen.flow(X=img_data,y=angle_data, batch_size=batch_size, seed=656)
    
    while True:
        X1 = gen1.next()
        X2 = gen2.next()
        
        #return batches
        yield [X1[0], X2[1]], X1[1]

#create generator
gen_flow = gen_flow_two_inputs(X_train, X_angle_train, y_train)

In [40]:
gen1 = gen.flow(X=X_train,y=y_train, batch_size=batch_size, seed=656)
gen2 = gen.flow(X=X_train,y=X_angle_train, batch_size=batch_size, seed=656)
X1 = gen1.next()
X2 = gen2.next()

print(X1[0].shape) 
print(X1[1].shape)
print(X2[1].shape)

(64, 75, 75, 3)
(64,)
(64,)


## Fit Model output with generator

In [43]:
model = get_model()

In [44]:
#fit model with generator batches
model.fit_generator(gen_flow,samples_per_epoch= batch_size,
                    validation_data=([X_valid, X_angle_valid], y_valid),
                    nb_epoch=25, callbacks = callbacks)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25


<keras.callbacks.History at 0x7f0bd2323150>

In [46]:
model.load_weights(filepath=file_path)


In [48]:
model.fit_generator(gen_flow,samples_per_epoch= len(X_train)/batch_size,
                    validation_data=([X_valid, X_angle_valid], y_valid),
                    nb_epoch=25, callbacks = callbacks)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25


<keras.callbacks.History at 0x7f0bd2851310>

In [49]:
model.save_weights(model_path+'model2_3.h5')
prediction = model.predict([X_test, X_angle_test], verbose=1)
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.to_csv("mod2_subm3.csv", index=False)

#create downloadable submission file to upload
submission_filename = 'mod2_subm3.csv'

FileLink(submission_filename)



In [51]:
optimizer = Adam(lr=1e-4)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

In [52]:
model.fit_generator(gen_flow,samples_per_epoch= len(X_train)/batch_size,
                    validation_data=([X_valid, X_angle_valid], y_valid),
                    nb_epoch=25, callbacks = callbacks)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f0bc385a1d0>

In [54]:
model.fit_generator(gen_flow,samples_per_epoch= len(X_train)/batch_size,
                    validation_data=([X_valid, X_angle_valid], y_valid),
                    nb_epoch=40, callbacks = callbacks)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40


<keras.callbacks.History at 0x7f0bc3870090>

In [55]:
model.save_weights(model_path+'model2_4.h5')
prediction = model.predict([X_test, X_angle_test], verbose=1)
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.to_csv("mod2_subm4.csv", index=False)

#create downloadable submission file to upload
submission_filename = 'mod2_subm4.csv'

FileLink(submission_filename)



In [56]:
#specify data augmentation types
batch_size = 64

#define generator and image transformations
gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.1,
                               height_shift_range=0.1, zoom_range=0.1, 
                               vertical_flip = True, horizontal_flip=True,
                               dim_ordering = 'tf')
#create generator
gen_flow = gen_flow_two_inputs(X_train, X_angle_train, y_train)

In [58]:
model = get_model()


In [72]:
model.load_weights(filepath=file_path)
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss="binary_crossentropy",
              metrics=["accuracy"])

model.fit_generator(gen_flow,samples_per_epoch= batch_size/2,
                    validation_data=([X_valid, X_angle_valid], y_valid),
                    nb_epoch=50, callbacks = callbacks)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


<keras.callbacks.History at 0x7f0bbcf0add0>

In [73]:
model.save_weights(model_path+'model2_5.h5')
prediction = model.predict([X_test, X_angle_test], verbose=1)
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.to_csv("mod2_subm5.csv", index=False)

#create downloadable submission file to upload
submission_filename = 'mod2_subm5.csv'

FileLink(submission_filename)



In [90]:
model = get_model(p_dropout=0.1)

In [96]:
optimizer = Adam(lr=1e-4)
model.compile(optimizer=optimizer, loss="binary_crossentropy",
              metrics=["accuracy"])

model.fit([X_train, X_angle_train], y_train, nb_epoch=10, 
          validation_data=([X_valid, X_angle_valid], y_valid), 
          batch_size=batch_size*2, callbacks = callbacks)


Train on 1203 samples, validate on 401 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0bb5ec2050>

In [None]:
file_path2 = model_path+'model2_dropout.h5'
model.save_weights(file_path2)
prediction = model.predict([X_test, X_angle_test], verbose=1)
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.to_csv("mod2_subm_drop1.csv", index=False)

#create downloadable submission file to upload
submission_filename = 'mod2_subm_drop1.csv'

FileLink(submission_filename)

In [97]:
#model.load_weights(filepath=file_path2)
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss="binary_crossentropy",
              metrics=["accuracy"])

model.fit_generator(gen_flow,samples_per_epoch= batch_size/2,
                    validation_data=([X_valid, X_angle_valid], y_valid),
                    nb_epoch=50, callbacks = callbacks)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


<keras.callbacks.History at 0x7f0bb1c15210>