In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
config = tf.ConfigProto()
# config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.3
tf.Session(config=config)

from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import EarlyStopping
from keras.utils import plot_model
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

from jupyterthemes import jtplot

jtplot.style()

## Data pipline

In [None]:
train = pd.read_json('data/train.json')
train['inc_angle'] = pd.to_numeric(train['inc_angle'], errors='coerce') #133 NqNs
train['inc_angle'] = train['inc_angle'].fillna(method='pad');

test = pd.read_json('data/test.json')


In [None]:
train['inc_angle'] = pd.to_numeric(train['inc_angle'], errors='coerce') #133 NqNs
train['inc_angle'] = train['inc_angle'].fillna(method='pad');

In [None]:
def get_training_data(df, angles=False, labels=False):
    imgs = []
    ylabels = []
    inc_angles = []
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 / band_2
#         band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
#         a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
#         b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
#         c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())
        
        a = (band_1 + abs(band_1.min())) / np.max((band_1 + abs(band_1.min())))
        b = (band_2 + abs(band_2.min())) / np.max((band_2 + abs(band_2.min())))
        c = (band_3 + abs(band_3.min())) / np.max((band_3 + abs(band_3.min())))
        
        imgs.append(np.dstack((a, b, c)))
        if labels:
            ylabels.append(row['is_iceberg'])
        if angles: 
            inc_angles.append(row['inc_angle'])
    if angles and labels:    
        return np.array(imgs), np.array(inc_angles), np.array(ylabels)
    if labels:
        return np.array(imgs), np.array(ylabels)
    return np.array(imgs)
        

In [None]:
X_train, inc_angles, Y_train = get_training_data(train, angles=True, labels=True)

In [None]:
X_test = get_training_data(test, angles=True)

In [None]:
X_train.shape, Y_train.shape, X_test.shape

In [None]:
gen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         channel_shift_range=0,
                         zoom_range = 0.2,
                         rotation_range = 10)

In [None]:
def gen_flow_for_two_inputs(Xtrain, inc_angles, Ytrain):
    g1 = gen.flow(Xtrain, inc_angles, batch_size=10, seed=5)
    g2 = gen.flow(Xtrain, Ytrain, batch_size=10, seed=5)
    while True:
        i = g1.next()
        j = g2.next()
        yield [i[0], i[1]], j[1]    
        
# gen_flow_with_inc_angles = gen_flow_for_two_inputs(X_train, inc_angles, Y_train)

In [None]:
# Xtrain_data, Ytrain_data = get_training_data(train_data, labels=True)
# Xdev_data, Ydev_data = get_training_data(dev_data, labels=True)

In [None]:
train_gen = gen.flow(X_train, Y_train, seed=5)
# dev_gen = gen.flow(Xdev_data, Ydev_data seed=5)


In [None]:
X_train_aug = []
Y_train_aug = []
for i in tqdm(range(1000)):
    x, y = train_gen.next()
    X_train_aug.extend(x)
    Y_train_aug.extend(y)

X_train_aug = np.asarray(X_train_aug)
Y_train_aug = np.asarray(Y_train_aug)

In [None]:
X_train_aug.shape, Y_train_aug.shape

In [None]:
# train_data, dev_data = train_test_split(train, test_size=0.1,random_state=0, stratify=train['is_iceberg'])

In [None]:
def plot_training(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(acc))
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1,)
    plt.plot(epochs, acc)
    plt.plot(epochs, val_acc)
    plt.legend(['train', 'val'], loc='upper left')
    plt.title(' accuracy')


    plt.subplot(1, 2, 2)
    
    plt.plot(epochs, loss)
    plt.plot(epochs, val_loss)
    plt.legend(['train', 'val'], loc='upper left')
    plt.title('loss')
    plt.show()

### VGG Model with angle

In [None]:
def vggModel():
    ang_input = Input(shape=[1], name='inc_angle')
    x1 = Dense(1)(ang_input)
    
    base_model = VGG16(weights='imagenet', include_top=False, 
                         input_shape=(75, 75, 3), classes=1)
    x2 = base_model.get_layer('block5_pool').output
    x2 = GlobalMaxPooling2D()(x2)
    x2 = Flatten()(x2)
    X = Concatenate()([x2, x1])
    
#     X = Dropout(0.2)(X)
    X = Dense(1024, activation='relu')(X)
    X = Dropout(0.2)(X)
    X = Dense(512, activation='relu')(X)
    X = Dropout(0.2)(X)
#     X = Dense(256, activation='relu')(X)
#     X = Dropout(0.2)(X)
    predictions = Dense(1, activation='sigmoid')(X)
    
    model = Model(inputs=[base_model.input, ang_input],
                outputs=predictions)
    
    
    for layer in model.layers[:19]:
        layer.trainable = False
    
    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)

    print(model.summary())
    model.compile(loss='binary_crossentropy',
                 optimizer=sgd,
                 metrics=['accuracy'])
    
    return model
    

In [None]:
model = vggModel()

In [None]:
%%time
model.fit_generator(
    gen_flow_with_inc_angles,
    steps_per_epoch=32,
    epochs=30,
    shuffle=True,
    verbose=1,
    validation_data=(Xdev, Ydev)
)

In [None]:
acc = model.evaluate([Xtrain, inc_angles], Ytrain, verbose=1, batch_size=50)
print('Train score', acc[0])
print('Train accuracy', acc[1])

### VGG model without angle layer

In [None]:
def vggModel2():

    base_model = VGG16(weights='imagenet', include_top=False, 
                         input_shape=(75, 75, 3), classes=1)
    X = base_model.get_layer('block5_pool').output
#     X = GlobalMaxPooling2D()(X)
    X = Flatten()(X)
    X = Dense(256, activation='relu')(X)
    X = Dropout(0.2)(X)
#     X = Dense(256, activation='relu')(X)
#     X = Dropout(0.2)(X)
#     X = Dense(256, activation='relu')(X)
#     X = Dropout(0.2)(X)
    predictions = Dense(1, activation='sigmoid')(X)
    
    model = Model(inputs=base_model.input,
                outputs=predictions)
    
    
    for layer in model.layers[:19]:
        layer.trainable = False
    
    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)

    print(model.summary())
    model.compile(loss='binary_crossentropy',
                 optimizer=sgd,
                 metrics=['accuracy'])
    
    return model
    

In [None]:
model2 = vggModel2()

In [None]:
model2.fit_generator(
    gen_flow_without_inc_angles,
    steps_per_epoch=1000,
    epochs=100,
    shuffle=True,
    verbose=1,
    validation_data=(Xdev, Ydev))

In [None]:
acc = model2.evaluate(Xtrain, Ytrain, verbose=1, batch_size=50)
print('Train score', acc[0])
print('Train accuracy', acc[1])

In [None]:
model2_ = vggModel2()

In [None]:
%%time
model2_.fit(Xtrain, Ytrain, batch_size=50, epochs=20, verbose=1, validation_data=(Xdev, Ydev))


### CNN without transfer learning

In [None]:
def CNN_without_tl():
    #Build keras model
    
    input_img = Input(shape=(75, 75, 3))
    
    # CNN 1
    X = Conv2D(32, kernel_size=(2, 2),activation='relu')(input_img)
    
    X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
    X = Dropout(0.3)(X)

    # CNN 2
    X = Conv2D(64, kernel_size=(2, 2),activation='relu')(X)
    
    X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
    X = Dropout(0.3)(X)
    # CNN 3
    X = Conv2D(128, kernel_size=(2, 2),activation='relu')(X)
    
    X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
    X = Dropout(0.3)(X)

    #CNN 4
    X = Conv2D(256, kernel_size=(2, 2),activation='relu')(X)
    X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
    X = Dropout(0.2)(X)

    X = Conv2D(512, kernel_size=(2, 2),activation='relu')(X)
    X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)
    X = Dropout(0.2)(X)


    
    # You must flatten the data for the dense layers
    X = Flatten()(X)

    #Dense 1
    X = Dense(1024, activation='relu')(X)
#     model.add(BatchNormalization())
    X = Dropout(0.5)(X)

    #Dense 2
    X = Dense(1024, activation='relu')(X)
#     model.add(BatchNormalization())
    X = Dropout(0.5)(X)
    
    X = Dense(1024, activation='relu')(X)
#     model.add(BatchNormalization())
    X = Dropout(0.5)(X)
    
    
    X = Dense(512, activation='relu')(X)
#     model.add(BatchNormalization())
    X = Dropout(0.5)(X)
    
    
    X = Dense(512, activation='relu')(X)
#     model.add(BatchNormalization())
    X = Dropout(0.5)(X)
    # Output 
    X = Dense(1, activation="sigmoid")(X)

#     optimizer = Adam(lr=0.001, decay=0.0)
    model = Model(inputs=input_img, outputs=X)
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    
#     print(model.summary())
    return model

In [None]:
model3 = CNN_without_tl()

In [None]:
# batch_size = 100
# for epoch in range(10):
# #     print('epoch', epoch)
#     i = 0
#     for x_batch, y_batch in gen.flow(X_train, Y_train, batch_size=batch_size):
#         model3.fit(x_batch, y_batch, batch_size=batch_size, verbose=0, validation_split=0.2)
#         i+=1
#         if i > len(X_train) // batch_size:
#             print(model3.evaluate(X_train, Y_train, verbose=0, batch_size=50))
#             break
            

In [None]:
%%time
model_history3 = model3.fit(X_train_aug, Y_train_aug, batch_size=100, epochs=20, verbose=1, validation_split=0.3,
          callbacks=[EarlyStopping(monitor='val_acc', patience=3, verbose=1)])


In [None]:
plot_training(model_history3)

In [None]:
acc = model3.evaluate(X_train_aug, Y_train_aug, verbose=1, batch_size=50)
print('Train score', acc[0])
print('Train accuracy', acc[1])

In [None]:
model4 = CNN_without_tl() #with generator

In [None]:
model4.fit_generator(
    gen_flow_without_inc_angles,
    steps_per_epoch=100,
    epochs=10,
    shuffle=True,
    verbose=1,
    validation_data=(Xdev, Ydev))

## InceptionV3

In [None]:
def top_model(input_shape):
    input_img = Input(input_shape)
    X = GlobalAveragePooling2D()(input_img)
#     X = Flatten(input_shape=input_shape)(input_img)
    X = Dropout(0.2)(X)   
    
    X = Dense(4096, activation='relu')(X)
    X = Dropout(0.5)(X)
    X = Dense(4096, activation='relu')(X)
    X = Dropout(0.5)(X)
#     X = Dense(512, activation='relu')(X)
#     X = Dropout(0.5)(X)
#     X = Dense(512, activation='relu')(X)
#     X = Dropout(0.5)(X)
    X = Dense(1, activation='sigmoid')(X)
    
    model = Model(inputs=input_img, outputs=X)
    
    model.compile(loss='binary_crossentropy',
                 optimizer='adam',#optimizers.SGD(lr=1e-4, momentum=0.9),#'adam',
                 metrics=['accuracy'])
    
    return model

In [None]:
inception_model = InceptionV3(input_tensor=Input((75, 75, 3)), weights='imagenet', include_top=False)

In [None]:
inc_train_bf = inception_model.predict(X_train_aug, verbose=1)
inc_test_bf = inception_model.predict(X_test, verbose=1)

In [None]:
# Xtrain_data_bf, Xdev_data_bf, Ytrain_data_bf, Ydev_data_bf = train_test_split(inc_train_bf, Y_train, test_size=0.1, random_state=0)

In [None]:
# train_gen = gen.flow(Xtrain_data_bf, Ytrain_data_bf, batch_size=10, seed=5)
# dev_gen = gen.flow(Xdev_data_bf, Ydev_data_bf, batch_size=10, seed=5)

In [None]:
# bottleneck_features_train = inception_model.predict_generator(train_gen, 1400)#80% of train
# bottleneck_features_validation = inception_model.predict_generator(dev_gen, 160)


In [None]:
# bottleneck_features_train.shape

In [None]:
inctop_model = top_model(inc_train_bf.shape[1:])

In [None]:
# %%time
# model.fit_generator(
#     train_gen,
#     steps_per_epoch=32,
#     epochs=30,
#     shuffle=True,
#     verbose=1,
#     validation_data=(Xdev, Ydev)
# )

In [None]:
inc_history = inctop_model.fit(inc_train_bf, Y_train_aug, batch_size=100, epochs=20, validation_split=0.1,
             callbacks=[EarlyStopping(monitor='val_acc', patience=3, verbose=1)])

In [None]:
plot_training(inc_history)

In [None]:
inctop_model.save_weights('models/inctop_model.h5')

In [None]:
incx = inception_model.predict(X_train, verbose=1)
acc = inctop_model.evaluate(incx, Y_train, verbose=1, batch_size=50)
print('Train score', acc[0])
print('Train accuracy', acc[1])

## Fine tuning 

In [None]:
def ft_model(base_model, top_model_weights_path):
    
    top = top_model(base_model.output_shape[1:])
    top.load_weights(top_model_weights_path)
#     x = base_model.predict(X_train)
#     print(top.evaluate(x, Y_train))
    ft_model = Model(inputs=base_model.inputs, outputs=top(base_model.output))
    
    ft_model.compile(loss='binary_crossentropy',
                 optimizer=SGD(lr=1e-4, momentum=0.9),
                 metrics=['accuracy'])
    
    return ft_model
    

In [None]:
inception_model = InceptionV3(input_tensor=Input((75, 75, 3)), weights='imagenet', include_top=False)
for layer in inception_model.layers[:299]:
    layer.trainable = False

In [None]:
inc_ft_model = ft_model(inception_model, 'models/inctop_model.h5')

In [None]:
inc_ft_history = inc_ft_model.fit_generator(
    train_gen,
    steps_per_epoch=140,
    epochs=20,
    validation_data=dev_gen,
    validation_steps=16,
    callbacks=[EarlyStopping(monitor='val_acc', patience=3, verbose=1)])

In [None]:
inc_ft_model2 = ft_model(inception_model, 'models/inctop_model.h5')

In [None]:
inc_history2 = inc_ft_model2.fit(X_train, Y_train, batch_size=10, epochs=20, validation_split=0.1,
             callbacks=[EarlyStopping(monitor='val_acc', patience=3, verbose=1)])

In [None]:
plot_training(inc_history2)

### prediction 

In [None]:
imgs = []
for i, row in test.iterrows():
    #make 75x75 image
    band_1 = np.array(row['band_1']).reshape(75, 75)
    band_2 = np.array(row['band_2']).reshape(75, 75)
    band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)

    # Rescale
    a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
    b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
    c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

    imgs.append(np.dstack((a, b, c)))
    
Xtest = np.array(imgs)

In [None]:
preds = inctop_model.predict(inc_test_bf)

In [None]:
submission = pd.DataFrame()
submission['id']=test['id']
submission['is_iceberg']=preds
submission.to_csv('sub.csv', index=False)