In [1]:
import numpy as np
import pandas as pd

np.random.seed(1993)
file_path = "./model/model_weights.hdf5"

In [2]:
train = pd.read_json("../input/train.json")

In [3]:
#train.head()

In [4]:
len(train.band_1[0])

5625

In [5]:
train.shape

(1604, 5)

In [6]:
train = train[train['inc_angle'] != 'na']

In [7]:
train_label = train.is_iceberg
train_label = np.array(train_label, dtype='float')

In [8]:
train_label.shape

(1471,)

In [9]:
train_data = train.drop(columns=['is_iceberg', 'id'])

In [10]:
train_data = train_data[train_data['inc_angle'] != 'na']

In [11]:
train_data.shape

(1471, 3)

In [12]:
rows = train_data.shape[0]

In [13]:
#train_data.head()

In [14]:
#train_data.tail()

In [15]:
angle_train = np.array([x[2] for x in train_data.values])

In [16]:
#len(angle), len(train_data.values)

In [17]:
def get_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():        
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = (band_1 + band_2) / 2
                
        imgs.append(np.dstack((band_1, band_2, band_3)))

    return np.array(imgs)

In [18]:
data_bands = get_imgs(train_data)

In [19]:
data_bands.shape

(1471, 75, 75, 3)

In [20]:
data_bands[0][0][0]

array([-27.878361 , -27.154118 , -27.5162395])

In [21]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaler_angle = MinMaxScaler(feature_range=(0,1))

In [22]:
data_bands_pre = data_bands.reshape(1471, 75 * 75 * 3)

In [23]:
data_bands_pre.reshape(1471, 75, 75, 3)[0][0][0]

array([-27.878361 , -27.154118 , -27.5162395])

In [24]:
data_bands_pre = scaler.fit_transform(data_bands_pre)
angle_train = scaler_angle.fit_transform(np.matrix(angle_train).T).reshape(rows)

In [None]:
imgs_bands = data_bands_pre.reshape((1471, 75, 75, 3))

In [None]:
imgs_bands[0][0][0]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [None]:
X, X_test, angle, angle_test, y, y_test = train_test_split(imgs_bands, angle_train, train_label, test_size=0.3, stratify=train_label)

In [None]:
X.shape, angle.shape, y.shape, y_test.shape

In [None]:
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]

In [None]:
from keras import layers
from keras import models
from keras.constraints import maxnorm
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate

def create_model(optimizer='RMSprop', init_mode='glorot_uniform', activation='relu', map_conv=[(8, 8), (4, 4), (2, 2)], out_layers=[64,64,32,32,64], dropout=.05, weight_constraint=4):
    bn_model = 0
    input_1 = layers.Input(shape=(75, 75, 3), name="X_1")
    input_2 = layers.Input(shape=[1], name="angle")
        
    img_1 = layers.Conv2D(out_layers[0], map_conv[0], activation=activation, input_shape=(75, 75, 3)) ((BatchNormalization(momentum=bn_model))(input_1))
    img_1 = layers.MaxPooling2D((2, 2)) (img_1)
    img_1 = layers.Dropout(dropout) (img_1)
    
    img_1 = layers.Conv2D(out_layers[1], map_conv[1], activation=activation, kernel_constraint=maxnorm(weight_constraint)) (img_1)
    img_1 = layers.MaxPooling2D((2, 2)) (img_1)
    img_1 = layers.Dropout(dropout) (img_1)
    
    img_1 = layers.Conv2D(out_layers[3], map_conv[2], activation=activation, kernel_constraint=maxnorm(weight_constraint)) (img_1)
    img_1 = layers.MaxPooling2D((2, 2)) (img_1)
    img_1 = layers.Dropout(dropout) (img_1)
    img_1 = layers.GlobalMaxPooling2D() (img_1)    
    
    img_2 = layers.Conv2D(out_layers[0], kernel_size = (3,3), activation=activation) ((BatchNormalization(momentum=bn_model))(input_1))
    img_2 = layers.MaxPooling2D((2,2)) (img_2)
    img_2 = layers.Dropout(dropout) (img_2)
    img_2 = layers.GlobalMaxPooling2D() (img_2)    
    
    img_concat =  (Concatenate()([img_1, img_2, BatchNormalization(momentum=bn_model)(input_2)]))
    
    dense = layers.Dropout(dropout) (BatchNormalization(momentum=bn_model) (layers.Dense(out_layers[4], activation=activation, kernel_constraint=maxnorm(weight_constraint)) (img_concat)))     
    
    output = layers.Dense(1, activation="sigmoid") (dense)
    model = models.Model([input_1,input_2],  output)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    #model.summary()
    return model

In [None]:
callbacks = get_callbacks(filepath=file_path, patience=5)

model = create_model()
#model = KerasClassifier(build_fn=create_model, verbose=2)
#loss = ['binary_crossentropy']
#epochs = [10]
#out_layers=[[64,128,32,32,64], [64,64,32,32,64], [64,32,32,32,64]]

#param_grid = dict(batch_size=[2], epochs=epochs)

#grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [None]:
#grid_result = grid.fit(X, y)
model.fit([X, angle], y, 
          epochs=25, 
          validation_data=([X_test, angle_test], y_test), 
          batch_size=8, 
          callbacks=callbacks)

In [None]:
#print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
#means = grid_result.cv_results_['mean_test_score']
#stds = grid_result.cv_results_['std_test_score']
#params = grid_result.cv_results_['params']
#for mean, stdev, param in zip(means, stds, params):
#    print("%f (%f) with: %r" % (mean, stdev, param))

In [25]:
from keras.models import model_from_json
import os

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [None]:
model_json = model.to_json()
with open('./model/covnet-v4.json', 'w') as json_file:
    json_file.write(model_json)

In [26]:
json_file = open('./model/covnet-v4.json', 'r')
model = model_from_json(json_file.read())
json_file.close()

model.load_weights(filepath=file_path)

In [None]:
model.save_weights('./model/covnet-v4.h5')

In [27]:
test = pd.read_json('../input/test.json')

In [28]:
test.head()

Unnamed: 0,band_1,band_2,id,inc_angle
0,"[-15.863251, -15.201077, -17.887735, -19.17248...","[-21.629612, -21.142353, -23.908337, -28.34524...",5941774d,34.9664
1,"[-26.058969497680664, -26.058969497680664, -26...","[-25.754207611083984, -25.754207611083984, -25...",4023181e,32.615072
2,"[-14.14109992980957, -15.064241409301758, -17....","[-14.74563980102539, -14.590410232543945, -14....",b20200e4,37.505433
3,"[-12.167478, -13.706167, -16.54837, -13.572674...","[-24.32222, -26.375538, -24.096739, -23.8769, ...",e7f018bb,34.4739
4,"[-23.37459373474121, -26.02718162536621, -28.1...","[-25.72234344482422, -27.011577606201172, -23....",4371c8c3,43.918874


In [29]:
print(test.shape)
rows = test.shape[0]

(8424, 4)


In [30]:
angle_test = np.array([x[3] for x in test.values])

In [31]:
test_bands = get_imgs(test)

In [32]:
#test_bands = np.array([x[0] + x[1] for x in test.values])
test_bands = test_bands.reshape(rows, 75 * 75 * 3)

In [33]:
test_bands = scaler.transform(test_bands)
angle_test = scaler_angle.transform(np.matrix(angle_test).T).reshape(rows)

In [34]:
test_bands.shape

(8424, 16875)

In [35]:
#test_bands = np.reshape(test_bands, (8424, 75, 75, 2))
test_bands = test_bands.reshape((rows, 75, 75, 3))

In [None]:
ids, is_iceberg = [], []

In [36]:
prediction = model.predict([test_bands, angle_test], verbose=1, batch_size=16)

#for row in range(len(test_bands)):
#    img = test_bands[row]
#    angle = angle_test[row]
#    ids.append(test.id[row])
#    pred = model.predict(np.array([img]))
#    is_iceberg.append(pred[0][0])

#print(ids[:10], is_iceberg[:10])



In [37]:
prediction

array([[1.0908052e-01],
       [4.6530557e-01],
       [4.1099277e-13],
       ...,
       [5.9422940e-01],
       [9.9356586e-01],
       [5.5685206e-10]], dtype=float32)

In [38]:
submission = pd.DataFrame({'id': test["id"], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
submission.head(10)

Unnamed: 0,id,is_iceberg
0,5941774d,0.1090805
1,4023181e,0.4653056
2,b20200e4,4.109928e-13
3,e7f018bb,0.8221838
4,4371c8c3,0.1172928
5,a8d9b1fd,4.896826e-14
6,29e7727e,0.5490165
7,92a51ffb,0.9552501
8,c769ac97,0.02482587
9,aee0547d,0.00205718


In [None]:
#df_result = pd.DataFrame({'id': ids,'is_iceberg': is_iceberg})

In [None]:
#df_result.head()

In [39]:
submission.to_csv('submission_v3.csv', encoding='utf-8', float_format='%.16f', index=False)

In [40]:
submission.is_iceberg.mean()

0.30894306

In [41]:
submission.is_iceberg.std()

0.37830165