# Setup

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten
from keras.optimizers import Adam

Using TensorFlow backend.


In [2]:
train = pd.read_json("_RawData/train.json/data/processed/train.json")
test = pd.read_json("_RawData/test.json/data/processed/test.json")

In [3]:
train.head()

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0


# CNN

In [4]:
def concat_data(data):
    channel1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in data["band_1"]])
    channel2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in data["band_2"]])
    channel3 = (channel1 + channel2) / 2.
    
    print(channel1.shape)
    print(channel2.shape)
    print(channel3.shape)
    
    return np.concatenate([channel1[:, :, :, np.newaxis], channel2[:, :, :, np.newaxis],channel3[:, :, :, np.newaxis]], axis=-1)

In [5]:
X_train = concat_data(train)

(1604, 75, 75)
(1604, 75, 75)
(1604, 75, 75)


In [6]:
from sklearn.model_selection import train_test_split

y_train=train['is_iceberg']
X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, y_train, random_state=1, train_size=0.75)



In [10]:
def get_generators(train_data, valid_data):
    data_gen = ImageDataGenerator(
            rescale=1./255,
            shear_range=0.2,
            zoom_range=0.2,
            rotation_range=180,
            vertical_flip=True,
            horizontal_flip=True)

    data_gen.fit(train_data)

    val_gen = ImageDataGenerator(rescale=1./255)
    val_gen.fit(valid_data)
    
    return data_gen, val_gen

In [11]:
dgen, vgen = get_generators(X_train_cv, X_valid)

In [12]:
from keras.applications import VGG16

vgg_model = VGG16(include_top=False, weights=None, input_shape=(75, 75, 3))

top_model = Sequential()
top_model.add(Flatten(input_shape=vgg_model.output_shape[1:]))
top_model.add(Dense(512, activation='relu'))
# top_model.add(Dense(512, activation='relu'))
# top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
# top_model.load_weights(top_model_weights_path)

model = Model(inputs= vgg_model.input, outputs= top_model(vgg_model.output))

model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 75, 75, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 75, 75, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 75, 75, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 37, 37, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 37, 37, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 37, 37, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 18, 18, 128)       0         
__________

In [13]:
model.fit_generator(dgen.flow(X_train_cv, y_train_cv, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, 
                    validation_data=vgen.flow(X_valid, y_valid, batch_size=32, shuffle=False),
                    validation_steps = len(X_valid) / 32,
                    epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x26f091a17f0>