In [1]:
import numpy as np
import pandas as pd
import pylab
import cv2
import pdb
import scipy
import keras
import math

Using TensorFlow backend.


In [2]:
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom, map_coordinates
from scipy.ndimage import imread
from scipy.ndimage import rotate as rot
from scipy.ndimage.filters import gaussian_filter
from scipy.signal import convolve2d
from numpy.random import random_integers
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedShuffleSplit
from os.path import join as opj
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['figure.figsize'] = 10,10
%matplotlib inline

In [3]:
from matplotlib import pyplot
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation
from keras.layers import GlobalMaxPooling2D, Lambda, Input, Flatten,LSTM
from keras.layers import ZeroPadding2D, GlobalAveragePooling2D, Merge
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.layers import *
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.models import Model, model_from_json, Sequential
from keras import initializers
from keras.optimizers import Adam, RMSprop, SGD
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.callbacks import ReduceLROnPlateau, History
from keras.datasets import cifar10
from keras import backend as K
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.xception import Xception
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg19 import VGG19
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

In [4]:
train_1 = pd.read_json("train.json")

In [5]:
def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75,75)
        band_2 = np.array(row['band_2']).reshape(75,75)
        band_3 = band_1 + band_2 
        
        #Rescale
        a = (band_1 - band_1.mean())/(band_1.max() - band_1.min())
        b = (band_2 - band_2.mean())/(band_2.max() - band_2.min())
        c = (band_3 - band_3.mean())/(band_3.max() - band_3.min())
        
        imgs.append(np.dstack((a,b,c)))
    return np.array(imgs)

In [6]:
Xtrain_1 = get_scaled_imgs(train_1)
Ytrain_1 = np.array(train_1['is_iceberg'])

In [7]:
train_1.inc_angle = train_1.inc_angle.replace('na',0)
idx_tr = np.where(train_1.inc_angle>0)

In [8]:
Ytrain_1 = Ytrain_1[idx_tr[0]]
Xtrain_1 = Xtrain_1[idx_tr[0],...]

In [9]:
def get_more_images(imgs):
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
    
    for i in range(0,imgs.shape[0]):
        a = imgs[i,:,:,0]
        b = imgs[i,:,:,1]
        c = imgs[i,:,:,2]
        
        av = cv2.flip(a,1)
        ah = cv2.flip(a,0)
        bv = cv2.flip(b,1)
        bh = cv2.flip(b,0)
        cv = cv2.flip(c,1)
        ch = cv2.flip(c,0)
        
        vert_flip_imgs.append(np.dstack((av,bv,cv)))
        hori_flip_imgs.append(np.dstack((ah,bh,ch)))
        
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
    more_images = np.concatenate((imgs,v,h))
    
    return more_images

In [10]:
Xtr_more_1 = get_more_images(Xtrain_1) 
Ytr_more_1 = np.concatenate((Ytrain_1,Ytrain_1,Ytrain_1))

In [11]:
def getModel_1():
    model = Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))

    # Output 
    model.add(Dense(1, activation="sigmoid"))

    optimizer = Adam(lr=0.001, decay=0.0)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [12]:
model_1 = getModel_1()
earlyStopping = EarlyStopping(monitor = 'loss',
                              patience =10,
                              verbose = 0,
                              mode = 'min'
                             )
mcp_save = ModelCheckpoint('.mock_2_model_1_weights.hdf5',
                           save_best_only=True,
                           save_weights_only=True,
                           monitor = 'loss',
                           mode = 'min'
                          )

In [13]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

datagen.fit(Xtr_more_1)

In [14]:
model_1.fit_generator(datagen.flow(Xtr_more_1,
          Ytr_more_1,
          batch_size=32),
          epochs=50,
          verbose=1,
         callbacks=[earlyStopping,mcp_save])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2230b9d0b70>

In [15]:
model_1.load_weights(filepath = '.mock_2_model_1_weights.hdf5')
score_1 = model_1.evaluate(Xtrain_1, Ytrain_1, verbose=1)
print('Train score:', score_1[0])
print('Train accuracy:', score_1[1])

Train score: 0.0986877835168
Train accuracy: 0.968048946295


In [None]:
test_1 = pd.read_json('test.json')
test_1.inc_angle = test_1.inc_angle.replace('na',0)
Xtest_1 = (get_scaled_imgs(test_1))

In [None]:
pred_test_1 = model_1.predict(Xtest_1)

submission_1 = pd.DataFrame({'id': test_1["id"], 'is_iceberg': pred_test_1.reshape((pred_test_1.shape[0]))})
submission_1.to_csv('model_1.csv', index=False)

### MODEL 1b

In [20]:
model_1b = getModel_1()
batch_size = 32
earlyStopping_2 = EarlyStopping(monitor = 'val_loss',
                              patience =10,
                              verbose = 0,
                              mode = 'min'
                             )
mcp_save_2 = ModelCheckpoint('.mock_2_model_1b_weights.hdf5',
                           save_best_only=True,
                           monitor = 'val_loss',
                           mode = 'min'
                          )
reduce_lr_loss_2 = ReduceLROnPlateau(monitor='val_loss',
                                  factor = 0.1,
                                  patience=7,
                                  verbose = 0,
                                  epsilon=1e-4,
                                  mode='min')

In [21]:
model_1b.fit(Xtr_more_1,
          Ytr_more_1,
          batch_size=batch_size,
          epochs=25,
          verbose=1,
         callbacks=[earlyStopping_2,mcp_save_2,reduce_lr_loss_2],
                       validation_split=0.25)

Train on 3309 samples, validate on 1104 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25


<keras.callbacks.History at 0x1963e334ac8>

In [22]:
model_1b.load_weights(filepath = '.mock_2_model_1b_weights.hdf5')

score_1b = model_1b.evaluate(Xtrain_1, Ytrain_1, verbose=1)
print('Train score:', score_1b[0])
print('Train accuracy:', score_1b[1])

Train score: 0.0922697961082
Train accuracy: 0.966689326988


In [None]:
pred_test_1b = model_1b.predict(Xtest_1)

submission_1b = pd.DataFrame({'id': test_1["id"], 'is_iceberg': pred_test_1b.reshape((pred_test_1b.shape[0]))})
submission_1b.to_csv('model_1b.csv', index=False)

### MODEL 1c

In [23]:
model_1.load_weights(filepath = '.mock_2_model_1b_weights.hdf5')

score_1c = model_1.evaluate(Xtrain_1, Ytrain_1, verbose=1)
print('Train score:', score_1c[0])
print('Train accuracy:', score_1c[1])

Train score: 0.0922697961082
Train accuracy: 0.966689326988


### MODEL 2