In [1]:
import numpy as np
import pandas as pd

np.random.seed(1993)
file_path = "./model/model_weights.hdf5"

In [2]:
train = pd.read_json("../input/train.json")

In [3]:
#train.head()

In [4]:
len(train.band_1[0])

5625

In [5]:
train.shape

(1604, 5)

In [6]:
train = train[train['inc_angle'] != 'na']

In [7]:
train_label = train.is_iceberg
train_label = np.array(train_label, dtype='float')

In [8]:
train_label.shape

(1471,)

In [9]:
train_data = train.drop(columns=['is_iceberg', 'id'])

In [10]:
train_data = train_data[train_data['inc_angle'] != 'na']

In [11]:
train_data.shape

(1471, 3)

In [12]:
rows = train_data.shape[0]

In [13]:
#train_data.head()

In [14]:
#train_data.tail()

In [15]:
angle_train = np.array([x[2] for x in train_data.values])

In [16]:
#len(angle), len(train_data.values)

In [17]:
def get_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():        
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2) / 2
                
        imgs.append(np.dstack((band_1, band_2, band_3)))

    return np.array(imgs)

In [18]:
data_bands = get_imgs(train_data)

In [19]:
data_bands.shape

(1471, 75, 75, 3)

In [20]:
data_bands[0][0][0]

array([-27.878361 , -27.154118 , -27.5162395])

In [21]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaler_angle = MinMaxScaler(feature_range=(0,2))

In [22]:
data_bands_pre = data_bands.reshape(1471, 75 * 75 * 3)

In [23]:
data_bands_pre.reshape(1471, 75, 75, 3)[0][0][0]

array([-27.878361 , -27.154118 , -27.5162395])

In [24]:
data_bands_pre = scaler.fit_transform(data_bands_pre)
angle_train = scaler_angle.fit_transform(np.matrix(angle_train).T).reshape(rows)

In [25]:
imgs_bands = data_bands_pre.reshape((1471, 75, 75, 3))

In [26]:
imgs_bands[0][0][0]

array([0.19049476, 0.47100015, 0.20687876])

In [27]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [28]:
X, X_test, angle, angle_test, y, y_test = train_test_split(imgs_bands, angle_train, train_label, test_size=0.3, stratify=train_label)

In [29]:
X.shape, angle.shape, y.shape, y_test.shape

((1029, 75, 75, 3), (1029,), (1029,), (442,))

In [30]:
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras import metrics

def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]

In [31]:
from keras import layers
from keras import models
from keras.constraints import maxnorm
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate

def create_model(optimizer='RMSprop', init_mode='glorot_uniform', activation='relu', map_conv=[(4, 4), (3, 3), (2, 2)], out_layers=[64,64,32,32,64], dropout=.09, weight_constraint=4):
    bn_model = .50
    input_1 = layers.Input(shape=(75, 75, 3), name="X_1")
    input_2 = layers.Input(shape=[1], name="angle")
        
    img_1 = layers.Conv2D(out_layers[0], map_conv[0], activation=activation, input_shape=(75, 75, 3)) ((BatchNormalization(momentum=bn_model))(input_1))
    img_1 = layers.MaxPooling2D((2, 2)) (img_1)
    img_1 = layers.Dropout(dropout) (img_1)
    
    img_1 = layers.Conv2D(out_layers[1], map_conv[1], activation=activation, kernel_constraint=maxnorm(weight_constraint)) (img_1)
    img_1 = layers.MaxPooling2D((2, 2)) (img_1)
    img_1 = layers.Dropout(dropout) (img_1)
    
    img_1 = layers.Conv2D(out_layers[3], map_conv[2], activation=activation, kernel_constraint=maxnorm(weight_constraint)) (img_1)
    img_1 = layers.MaxPooling2D((2, 2)) (img_1)
    img_1 = layers.Dropout(dropout) (img_1)
    img_1 = layers.GlobalMaxPooling2D() (img_1)    
    
    img_2 = layers.Conv2D(out_layers[0], kernel_size = (3,3), activation=activation) ((BatchNormalization(momentum=bn_model))(input_1))
    img_2 = layers.MaxPooling2D((2,2)) (img_2)
    img_2 = layers.Dropout(dropout) (img_2)
    img_2 = layers.GlobalMaxPooling2D() (img_2)    
    
    img_concat =  (Concatenate()([img_1, img_2, BatchNormalization(momentum=bn_model)(input_2)]))
    
    dense = layers.Dropout(dropout) (BatchNormalization(momentum=bn_model) (layers.Dense(out_layers[4], activation=activation, kernel_constraint=maxnorm(weight_constraint)) (img_concat)))     
    
    output = layers.Dense(1, activation="sigmoid") (dense)
    model = models.Model([input_1,input_2],  output)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc'])
    #model.summary()
    return model

In [32]:
callbacks = get_callbacks(filepath=file_path, patience=40)

model = create_model()
#model = KerasClassifier(build_fn=create_model, verbose=2)
#loss = ['binary_crossentropy']
#epochs = [10]
#out_layers=[[64,128,32,32,64], [64,64,32,32,64], [64,32,32,32,64]]

#param_grid = dict(batch_size=[2], epochs=epochs)

#grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [33]:
#grid_result = grid.fit(X, y)
model.fit([X, angle], y, 
          epochs=300, 
          validation_data=([X_test, angle_test], y_test), 
          batch_size=8, 
          callbacks=callbacks)

Train on 1029 samples, validate on 442 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300


Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300


<keras.callbacks.History at 0x7f089fb44710>

In [None]:
#print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
#means = grid_result.cv_results_['mean_test_score']
#stds = grid_result.cv_results_['std_test_score']
#params = grid_result.cv_results_['params']
#for mean, stdev, param in zip(means, stds, params):
#    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
from keras.models import model_from_json
import os

In [None]:
model_json = model.to_json()
with open('./model/covnet-v6.json', 'w') as json_file:
    json_file.write(model_json)

In [None]:
json_file = open('./model/covnet-v6.json', 'r')
model = model_from_json(json_file.read())
json_file.close()

model.load_weights(filepath=file_path)

In [None]:
model.save_weights('./model/covnet-v6.h5')

In [None]:
test = pd.read_json('../input/test.json')

In [None]:
test.head()

In [None]:
print(test.shape)
rows = test.shape[0]

In [None]:
angle_test = np.array([x[3] for x in test.values])

In [None]:
test_bands = get_imgs(test)

In [None]:
#test_bands = np.array([x[0] + x[1] for x in test.values])
test_bands = test_bands.reshape(rows, 75 * 75 * 3)

In [None]:
test_bands = scaler.transform(test_bands)
angle_test = scaler_angle.transform(np.matrix(angle_test).T).reshape(rows)

In [None]:
test_bands.shape

In [None]:
#test_bands = np.reshape(test_bands, (8424, 75, 75, 2))
test_bands = test_bands.reshape((rows, 75, 75, 3))

In [None]:
ids, is_iceberg = [], []

In [None]:
prediction = model.predict([test_bands, angle_test], verbose=1, batch_size=16)

#for row in range(len(test_bands)):
#    img = test_bands[row]
#    angle = angle_test[row]
#    ids.append(test.id[row])
#    pred = model.predict(np.array([img]))
#    is_iceberg.append(pred[0][0])

#print(ids[:10], is_iceberg[:10])

In [None]:
prediction

In [None]:
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.head(10)

In [None]:
#df_result = pd.DataFrame({'id': ids,'is_iceberg': is_iceberg})

In [None]:
#df_result.head()

In [None]:
submission.to_csv('submission_v6.csv', encoding='utf-8', float_format='%.8f', index=False)

In [None]:
submission.is_iceberg.mean()

In [None]:
submission.is_iceberg.std()