In [1]:
import numpy as np # linear algebra
import scipy as scipy
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
import dill

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

import cv2
from tqdm import tqdm


Using Theano backend.


In [2]:
with open('tiffs.pkl', 'rb') as in_strm:
    all_info = dill.load(in_strm)
x_train = all_info[0]
x_test = all_info[1]
y_train = all_info[2]
y_test = all_info[3]

print(x_train.shape)

(35000, 32, 32, 4)


In [3]:
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

In [4]:
def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

In [5]:
def fbeta_score_keras(y_true, y_pred):
    beta = 2
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score

In [6]:
from sklearn.metrics import fbeta_score


def fbeta_keras_online(y_true, y_pred, threshold_shift=-0.3):
    beta = 2

    # just in case of hipster activation at the final layer
    y_pred = K.clip(y_pred, 0, 1)

    # shifting the prediction threshold from .5 if needed
    y_pred_bin = K.round(y_pred + threshold_shift)

    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

In [12]:
def binary_crossentropy_with_fbeta(y_true, y_pred):
    penalty_const = 0.01
    penalty = penalty_const * (1 - fbeta_keras_online(y_true, y_pred))
    return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) + penalty

In [13]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                input_shape=(32, 32, 4)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(17, activation='sigmoid'))

model.compile(loss=binary_crossentropy_with_fbeta, 
                    optimizer='adam',
                    metrics=[fbeta_keras_online])


model.fit(x_train, y_train, batch_size=1000, epochs=7, verbose=1, 
          validation_data=(x_test, y_test))


Train on 35000 samples, validate on 5479 samples
Epoch 1/7
Epoch 2/7
 2000/35000 [>.............................] - ETA: 172s - loss: 0.3084 - fbeta_keras_online: 0.5063

KeyboardInterrupt: 

In [14]:

from sklearn.metrics import fbeta_score

p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.18805675  0.06228532  0.49662563 ...,  0.12711665  0.2604568
   0.0673587 ]
 [ 0.17606932  0.05910827  0.39082506 ...,  0.09011976  0.22563849
   0.07528482]
 [ 0.12793879  0.02293998  0.12827724 ...,  0.05184398  0.18000919
   0.0380621 ]
 ..., 
 [ 0.29548964  0.16975032  0.26524153 ...,  0.12266882  0.6056416
   0.16881254]
 [ 0.30097678  0.17888857  0.37886751 ...,  0.20620933  0.52923286
   0.25532603]
 [ 0.11430117  0.02697576  0.10273355 ...,  0.04685617  0.20211954
   0.04236749]]
0.743334196247


In [15]:
model.fit(x_train, y_train, batch_size=1000, epochs=7, verbose=1, 
          validation_data=(x_test, y_test))

Train on 35000 samples, validate on 5479 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7


KeyboardInterrupt: 

In [16]:
from sklearn.metrics import fbeta_score

p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.2312353   0.02537001  0.48796815 ...,  0.0478692   0.27624384
   0.03611949]
 [ 0.13422211  0.01815431  0.30341125 ...,  0.02665851  0.26335654
   0.03648667]
 [ 0.29764172  0.0641996   0.53487962 ...,  0.06642032  0.42421669
   0.04690501]
 ..., 
 [ 0.33085829  0.03388953  0.14703441 ...,  0.04580366  0.6725229
   0.09434494]
 [ 0.30578449  0.0315675   0.15251234 ...,  0.03896739  0.65671033
   0.12435292]
 [ 0.17607771  0.01775476  0.20984675 ...,  0.03177514  0.43244749
   0.02760749]]
0.801121528337


In [17]:
model.fit(x_train, y_train, batch_size=1000, epochs=7, verbose=1, 
          validation_data=(x_test, y_test))

Train on 35000 samples, validate on 5479 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x10fdcebe0>

In [18]:
p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.19689874  0.01792444  0.54304975 ...,  0.0200441   0.26652539
   0.02287245]
 [ 0.16764882  0.00803599  0.36010656 ...,  0.01190144  0.29902935
   0.02725269]
 [ 0.1941321   0.25973311  0.75646967 ...,  0.03157576  0.43261868
   0.08985683]
 ..., 
 [ 0.31404099  0.00453185  0.13807026 ...,  0.00337426  0.71992868
   0.06464038]
 [ 0.17210431  0.00141903  0.15463065 ...,  0.00441624  0.55882019
   0.08544972]
 [ 0.06821376  0.00791778  0.12486854 ...,  0.00810282  0.21496125
   0.00638146]]
0.833643687431


In [19]:
model.fit(x_train, y_train, batch_size=1000, epochs=7, verbose=1, 
          validation_data=(x_test, y_test))

Train on 35000 samples, validate on 5479 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x119407da0>

In [20]:
p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[  1.31779805e-01   7.45426584e-03   6.87134206e-01 ...,   7.58733833e-03
    2.42215082e-01   1.08901327e-02]
 [  1.85924813e-01   3.95755703e-03   3.81921500e-01 ...,   6.60545425e-03
    4.06977832e-01   3.70894894e-02]
 [  1.31916136e-01   5.81686273e-02   8.88109028e-01 ...,   2.05938146e-03
    2.54274338e-01   1.52279064e-02]
 ..., 
 [  3.51471603e-01   3.24376771e-04   7.72729814e-02 ...,   2.99453008e-04
    7.69404709e-01   4.33638021e-02]
 [  1.57231107e-01   2.25267031e-05   1.30902454e-01 ...,   1.99996502e-04
    6.08606994e-01   5.99273965e-02]
 [  1.80830769e-02   3.27822665e-04   4.99162637e-02 ...,   1.84093940e-03
    1.64760411e-01   1.75900885e-03]]
0.848658690397
