# Incidence Angle model ensemble
Need to discard the training data that does not include an incidence angle

In [1]:
# load data
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from keras.models import Model
from keras.layers import Flatten, Dense, Input
from keras.layers import Convolution2D, MaxPooling2D, BatchNormalization, Dropout
from keras.layers import GlobalMaxPooling2D, concatenate
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras import regularizers
import random

# get list of hold out data for ensemble training
random.seed(17332)
nums = random.sample(range(0,1507),60)


training_path = "../../Test_data/train.json"
testing_path = "../../Test_data/test.json"

train_data_whole = pd.read_json(training_path)
train_data = train_data_whole.select(lambda x: x not in nums)

test_data = pd.read_json(testing_path)


# no third for inc angle
def get_scaled_imgs(df):
    imgs = []
    
    for i, row in df.iterrows():
        #make 75x75 image
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        # band_3 = band_1 + band_2 # plus since log(x*y) = log(x) + log(y)
        
        # Rescale
        # a = (band_1 - band_1.mean()) / (band_1.max() - band_1.min())
        # b = (band_2 - band_2.mean()) / (band_2.max() - band_2.min())
        # c = (band_3 - band_3.mean()) / (band_3.max() - band_3.min())

        imgs.append(np.dstack((band_1, band_2)))    # , c)))

    return np.array(imgs)

X = get_scaled_imgs(train_data)

# get only data that has a non 0 inc angle
train_data.inc_angle = train_data.inc_angle.replace('na',0)
idx_tr = np.where(train_data.inc_angle>0)

Y = train_data["is_iceberg"]
inc_angle = train_data["inc_angle"]

Y_inc = Y.iloc[idx_tr[0]]
X_inc = X[idx_tr[0],...]
inc_angle_inc = inc_angle.iloc[idx_tr[0]]
# inc_angle_norm = (inc_angle_inc - inc_angle_inc.mean()) / (inc_angle_inc.max() - inc_angle_inc.min())


X_full = X_inc
Y_full = Y_inc
inc_full = inc_angle_inc

X_train, X_test, y_train, y_test, inc_train, inc_test = train_test_split(X_inc, Y_inc,
                                                    inc_angle_inc, test_size = 0.25, 
                                                                         random_state=42)

print("size x_train", len(X_train))
print("size x_test", len(X_test))
print("size inc train", len(inc_train))
print("size inc test", len(inc_test))
print("size y_train", len(y_train))
print("size y_test", len(y_test))

print("size x full", len(X_full))
print("size y full", len(Y_full))
print("size inc full", len(inc_full))

#print(X_full)
#print(Y_full)
#print(inc_full)



Using TensorFlow backend.
  return f(*args, **kwds)


size x_train 1058
size x_test 353
size inc train 1058
size inc test 353
size y_train 1058
size y_test 353
size x full 1411
size y full 1411
size inc full 1411
0       0
1       0
2       1
3       0
4       0
5       1
6       1
7       0
8       0
9       0
10      1
11      0
12      1
13      1
15      0
16      0
17      0
18      0
19      1
20      0
21      1
22      0
23      1
24      0
25      1
26      1
27      0
28      1
30      0
31      0
       ..
1479    1
1480    0
1481    1
1482    0
1483    0
1484    1
1485    0
1486    1
1487    1
1488    0
1489    1
1490    1
1491    0
1492    1
1493    1
1494    0
1495    0
1496    1
1497    1
1498    1
1499    0
1500    1
1501    1
1502    1
1503    0
1504    1
1505    0
1506    1
1507    0
1508    1
Name: is_iceberg, Length: 1411, dtype: int64


In [2]:
# create the model
img_input = Input(shape=(75,75,2))
ang_input = Input(shape=(1,))
x = Convolution2D(64, (3,3), activation='relu', padding='same',
                  kernel_regularizer=regularizers.l2(0.1),
                  bias_regularizer=regularizers.l2(0.1), name='block1_conv1')(img_input)
x = Convolution2D(64, (3,3), activation='relu', padding='same',
                  kernel_regularizer=regularizers.l2(0.1),
                  bias_regularizer=regularizers.l2(0.1), name='block1_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
x = Dropout(0.2)(x)

x = Convolution2D(128, (3,3), activation='relu', padding='same',
                  kernel_regularizer=regularizers.l2(0.1),
                  bias_regularizer=regularizers.l2(0.1), name='block2_conv1')(x)
x = Convolution2D(128, (3,3), activation='relu', padding='same',
                  kernel_regularizer=regularizers.l2(0.1),
                  bias_regularizer=regularizers.l2(0.1), name='block2_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
x = Dropout(0.2)(x)

x = Convolution2D(256, (3,3), activation='relu', padding='same',
                  kernel_regularizer=regularizers.l2(0.1),
                  bias_regularizer=regularizers.l2(0.1), name='block3_conv1')(x)
x = Convolution2D(256, (3,3), activation='relu', padding='same',
                  kernel_regularizer=regularizers.l2(0.1),
                  bias_regularizer=regularizers.l2(0.1), name='block3_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
x = Dropout(0.2)(x)

x = Flatten(name='flatten')(x)
x = concatenate([x, ang_input])
x = Dense(256, kernel_regularizer=regularizers.l2(0.1),
          bias_regularizer=regularizers.l2(0.1))(x)
x = Dropout(0.5)(x)
x = Dense(128, kernel_regularizer=regularizers.l2(0.1),
          bias_regularizer=regularizers.l2(0.1))(x)
x = Dropout(0.5)(x)
main_output = Dense(1, activation='sigmoid', name='predictions')(x)

model = Model(inputs=[img_input,ang_input], outputs= [main_output], name='vgg_inc')
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.00001), metrics=['accuracy'])
model.summary()


def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]
file_path = "model_weights_inc_angle_TWO_reg1_ensem.hdf5"
callbacks = get_callbacks(filepath=file_path, patience=15)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 75, 75, 2)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 75, 75, 64)   1216        input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 75, 75, 64)   36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 37, 37, 64)   0           block1_conv2[0][0]               
__________________________________________________________________________________________________
dropout_1 

# Notes
0.5 on the reg for all got to about 90 with 256 and 128 on the fc layers

In [3]:
# fitting
model.fit([X_full, inc_full], Y_full, batch_size=32, 
          epochs=240, verbose=1) #, callbacks=callbacks, 
          # validation_data = ([X_test, inc_test], y_test))

Epoch 1/240
Epoch 2/240
Epoch 3/240
Epoch 4/240
Epoch 5/240
Epoch 6/240
Epoch 7/240
Epoch 8/240
Epoch 9/240
Epoch 10/240
Epoch 11/240
Epoch 12/240
Epoch 13/240
Epoch 14/240
Epoch 15/240
Epoch 16/240
Epoch 17/240
Epoch 18/240
Epoch 19/240
Epoch 20/240
Epoch 21/240
Epoch 22/240
Epoch 23/240
Epoch 24/240
Epoch 25/240
Epoch 26/240
Epoch 27/240
Epoch 28/240
Epoch 29/240
Epoch 30/240
Epoch 31/240
Epoch 32/240
Epoch 33/240
Epoch 34/240
Epoch 35/240
Epoch 36/240
Epoch 37/240
Epoch 38/240
Epoch 39/240
Epoch 40/240
Epoch 41/240
Epoch 42/240
Epoch 43/240
Epoch 44/240
Epoch 45/240
Epoch 46/240
Epoch 47/240
Epoch 48/240
Epoch 49/240
Epoch 50/240
Epoch 51/240
Epoch 52/240
Epoch 53/240
Epoch 54/240
Epoch 55/240
Epoch 56/240
Epoch 57/240
Epoch 58/240
Epoch 59/240
Epoch 60/240
Epoch 61/240
Epoch 62/240
Epoch 63/240
Epoch 64/240
Epoch 65/240
Epoch 66/240
Epoch 67/240
Epoch 68/240
Epoch 69/240
Epoch 70/240
Epoch 71/240
Epoch 72/240
Epoch 73/240
Epoch 74/240
Epoch 75/240
Epoch 76/240
Epoch 77/240
Epoch 78

Epoch 83/240
Epoch 84/240
Epoch 85/240
Epoch 86/240
Epoch 87/240
Epoch 88/240
Epoch 89/240
Epoch 90/240
Epoch 91/240
Epoch 92/240
Epoch 93/240
Epoch 94/240
Epoch 95/240
Epoch 96/240
Epoch 97/240
Epoch 98/240
Epoch 99/240
Epoch 100/240
Epoch 101/240
Epoch 102/240
Epoch 103/240
Epoch 104/240
Epoch 105/240
Epoch 106/240
Epoch 107/240
Epoch 108/240
Epoch 109/240
Epoch 110/240
Epoch 111/240
Epoch 112/240
Epoch 113/240
Epoch 114/240
Epoch 115/240
Epoch 116/240
Epoch 117/240
Epoch 118/240
Epoch 119/240
Epoch 120/240
Epoch 121/240
Epoch 122/240
Epoch 123/240
Epoch 124/240
Epoch 125/240
Epoch 126/240
Epoch 127/240
Epoch 128/240
Epoch 129/240
Epoch 130/240
Epoch 131/240
Epoch 132/240
Epoch 133/240
Epoch 134/240
Epoch 135/240
Epoch 136/240
Epoch 137/240
Epoch 138/240
Epoch 139/240
Epoch 140/240
Epoch 141/240
Epoch 142/240
Epoch 143/240
Epoch 144/240
Epoch 145/240
Epoch 146/240
Epoch 147/240
Epoch 148/240
Epoch 149/240
Epoch 150/240
Epoch 151/240
Epoch 152/240
Epoch 153/240
Epoch 154/240
Epoch 155

Epoch 164/240
Epoch 165/240
Epoch 166/240
Epoch 167/240
Epoch 168/240
Epoch 169/240
Epoch 170/240
Epoch 171/240
Epoch 172/240
Epoch 173/240
Epoch 174/240
Epoch 175/240
Epoch 176/240
Epoch 177/240
Epoch 178/240
Epoch 179/240
Epoch 180/240
Epoch 181/240
Epoch 182/240
Epoch 183/240
Epoch 184/240
Epoch 185/240
Epoch 186/240
Epoch 187/240
Epoch 188/240
Epoch 189/240
Epoch 190/240
Epoch 191/240
Epoch 192/240
Epoch 193/240
Epoch 194/240
Epoch 195/240
Epoch 196/240
Epoch 197/240
Epoch 198/240
Epoch 199/240
Epoch 200/240
Epoch 201/240
Epoch 202/240
Epoch 203/240
Epoch 204/240
Epoch 205/240
Epoch 206/240
Epoch 207/240
Epoch 208/240
Epoch 209/240
Epoch 210/240
Epoch 211/240
Epoch 212/240
Epoch 213/240
Epoch 214/240
Epoch 215/240
Epoch 216/240
Epoch 217/240
Epoch 218/240
Epoch 219/240
Epoch 220/240
Epoch 221/240
Epoch 222/240
Epoch 223/240
Epoch 224/240
Epoch 225/240
Epoch 226/240
Epoch 227/240
Epoch 228/240
Epoch 229/240
Epoch 230/240
Epoch 231/240
Epoch 232/240
Epoch 233/240
Epoch 234/240
Epoch 

<keras.callbacks.History at 0x7fd10fa69898>

In [None]:
# if doing no val set (i.e. final) only
filepath_full = 'model_weights_inc2_full_reg01_ensem.hdf5'
model.save(filepath_full)

In [None]:
# from above, need to see the amount of epochs (80 +) that val stops increasing at, then do 
# that in one full go and save the model

from keras.models import load_model
import pandas as pd
# if not using val
file_path_use = filepath_full
# if using val
#file_path_use = file_path
inf_model = load_model(file_path_use)

score = inf_model.evaluate([X_full,inc_full], Y_full, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# for hold out submission to train ensemble
hold_out_train = train_data.iloc[nums]


X_sub = get_scaled_imgs(hold_out_train)
inc_angle_t = hold_out_train["inc_angle"]
# inc_angle_norm_t = (inc_angle_t - inc_angle_t.mean()) / (inc_angle_t.max() - inc_angle_t.min())


predicted_test=inf_model.predict([X_sub, inc_angle_t])

print("len of pred test", len(predicted_test))
print("len of id", len(hold_out_train['id']))

submission = pd.DataFrame()
submission['id']=hold_out_train['id']
submission['is_iceberg']=predicted_test
submission.to_csv('sub_full_inc2_ensem.csv', index=False)