In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import tensorflow as tf
config = tf.ConfigProto()
# config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.3
tf.Session(config=config)
# import seaborn as sns
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

%matplotlib inline

In [None]:
train = pd.read_json('data/train.json')
test = pd.read_json('data/test.json')

In [None]:
train.shape, test.shape

In [None]:
def get_scaled_imgs(df):
    imgs = []
    labels = []
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((a, b, c)))
        labels.append(row['is_iceberg'])

    return np.array(imgs), np.array(labels)

In [None]:
def get_training_data(df, angles=False):
    imgs = []
    labels = []
    inc_angles = []
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 / band_2
#         band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
#         a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
#         b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
#         c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())
        
        a = (band_1 + abs(band_1.min())) / np.max((band_1 + abs(band_1.min())))
        b = (band_2 + abs(band_2.min())) / np.max((band_2 + abs(band_2.min())))
        c = (band_3 + abs(band_3.min())) / np.max((band_3 + abs(band_3.min())))
        
        imgs.append(np.dstack((a, b, c)))
        labels.append(row['is_iceberg'])
        if angles: 
            inc_angles.append(row['inc_angle'])
    if angles:    
        return np.array(imgs), np.array(inc_angles), np.array(labels)
    else:
        return np.array(imgs), np.array(labels)

In [None]:
Xtrain, Ytrain = get_scaled_imgs(train)

In [None]:
Xtest = get_test_imgs(test)

In [None]:
Xtrain.shape, Ytrain.shape, Xtest.shape

In [None]:
Ytrain = Ytrain.reshape(Ytrain.shape[0], 1)

In [None]:
def getModel():
    #Build keras model
    
    model=Sequential()
    
    # CNN 1
    model.add(Conv2D(64, kernel_size=(3, 3),activation='relu', input_shape=(75, 75, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 2
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu' ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # CNN 3
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    #CNN 4
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.2))

    # You must flatten the data for the dense layers
    model.add(Flatten())

    #Dense 1
    model.add(Dense(512, activation='relu'))
#     model.add(BatchNormalization())
    model.add(Dropout(0.2))

    #Dense 2
    model.add(Dense(256, activation='relu'))
#     model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
#     model.add(Dense(256, activation='relu'))
# #     model.add(BatchNormalization())
#     model.add(Dropout(0.25))
    # Output 
    model.add(Dense(1, activation="sigmoid"))

#     optimizer = Adam(lr=0.001, decay=0.0)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [None]:
# model = getModel()

In [None]:
# model.summary()
model = load_model('models/m1.h5')

In [None]:
%%time
model.fit(Xtrain, Ytrain, batch_size=4, epochs=20, verbose=1, validation_split=0.1)

In [None]:
acc = model.evaluate(Xtrain, Ytrain, verbose=1, batch_size=3)
print('Train score', acc[0])
print('Train accuracy', acc[1])

In [None]:
model.save('models/m1.h5')

In [None]:
del model

### With data augmentation

In [None]:
train2, dev2 = train_test_split(train, test_size=0.1,random_state=0, stratify=train['is_iceberg'])

In [None]:
Xtrain2, Ytrain2 = get_scaled_imgs(train2)
Xdev2, Ydev2 = get_scaled_imgs(dev2)

In [None]:
Xtrain2.shape, Ytrain2.shape, Xdev2.shape, Ydev2.shape

In [None]:
gen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.,
                         height_shift_range = 0.,
                         channel_shift_range=0,
                         zoom_range = 0.5,
                         rotation_range = 10)


In [None]:
gen_flow = gen.flow(Xtrain2, Ytrain2, batch_size=4, seed=5)

In [None]:
model2 = getModel()

In [None]:
model2.fit_generator(
    gen_flow,
    steps_per_epoch=1000,
    epochs=10,
    shuffle=True,
    verbose=1,
    validation_data=(Xdev2, Ydev2))

In [None]:
acc = model2.evaluate(Xtrain, Ytrain, verbose=1, batch_size=3)
print('Train score', acc[0])
print('Train accuracy', acc[1])

In [None]:
Ypredicted = model.predict(Xtest, batch_size=5)

In [None]:
model2.save('models/m2.h5')

In [None]:
del model2

In [None]:
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': Ypredicted.reshape((Ypredicted.shape[0])).round(decimals=3)})
submission.to_csv('submission.csv', index=False)

In [None]:
model.save()

In [None]:
# with open('submission-cnn-keras.csv', 'w') as file:
#     writer = csv.writer(file)
#     writer.writerow(['id', 'is_iceberg'])
#     for i, row in test.iterrows():
#         writer.writerow([row['id'], np.round(Ypredicted[i][0])])

In [None]:
model = load_model('models/m1.h5')