In [1]:
import numpy as np # linear algebra
import scipy as scipy
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
import dill

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

import cv2
from tqdm import tqdm


Using Theano backend.


In [2]:
with open('tiffs.pkl', 'rb') as in_strm:
    all_info = dill.load(in_strm)
x_train = all_info[0]
x_test = all_info[1]
y_train = all_info[2]
y_test = all_info[3]

print(x_train.shape)

(35000, 32, 32, 4)


In [3]:
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

In [4]:
def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

In [5]:
def fbeta_score_keras(y_true, y_pred):
    beta = 2
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score

In [7]:
from sklearn.metrics import fbeta_score


def fbeta_keras_online(y_true, y_pred, threshold_shift=-0.3):
    beta = 2

    # just in case of hipster activation at the final layer
    y_pred = K.clip(y_pred, 0, 1)

    # shifting the prediction threshold from .5 if needed
    y_pred_bin = K.round(y_pred + threshold_shift)

    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

In [12]:
def binary_crossentropy_with_fbeta(y_true, y_pred):
    penalty_const = 0.0001
    penalty = penalty_const * (1 - fbeta_keras_online(y_true, y_pred))
    return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) + penalty

In [13]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                input_shape=(32, 32, 4)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(17, activation='sigmoid'))

model.compile(loss=binary_crossentropy_with_fbeta, 
                    optimizer='adam',
                    metrics=['accuracy'])


model.fit(x_train, y_train, batch_size=1000, epochs=7, verbose=1, 
          validation_data=(x_test, y_test))

from sklearn.metrics import fbeta_score

p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

Train on 35000 samples, validate on 5479 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.06847271  0.01043847  0.29897368 ...,  0.02965965  0.14259568
   0.02833995]
 [ 0.09815234  0.01568796  0.22894315 ...,  0.029207    0.28038764
   0.0408434 ]
 [ 0.26550749  0.05561415  0.39012483 ...,  0.04651165  0.33103365
   0.08838293]
 ..., 
 [ 0.33051199  0.0067965   0.05479564 ...,  0.00839201  0.74840111
   0.03780741]
 [ 0.29340854  0.01224715  0.11985572 ...,  0.01602243  0.676732
   0.08945021]
 [ 0.13815248  0.00819488  0.12058246 ...,  0.00179451  0.1579328
   0.01012986]]
0.814052006067


In [14]:
model.fit(x_train, y_train, batch_size=1000, epochs=7, verbose=1, 
          validation_data=(x_test, y_test))

Train on 35000 samples, validate on 5479 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x11be28630>

In [15]:
p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.10676392  0.00423981  0.57201827 ...,  0.0203518   0.19874991
   0.01929451]
 [ 0.11797744  0.00628221  0.39176992 ...,  0.02416323  0.35258594
   0.03501657]
 [ 0.24952155  0.05486738  0.60402089 ...,  0.03145219  0.3392486
   0.05382715]
 ..., 
 [ 0.36395019  0.02095278  0.08963784 ...,  0.00548943  0.66093034
   0.09489788]
 [ 0.22619642  0.00615813  0.1436352  ...,  0.00339484  0.57740498
   0.06620781]
 [ 0.04945116  0.00196876  0.1287621  ...,  0.00085437  0.09784374
   0.0024065 ]]
0.830370032078


In [16]:
from keras.optimizers import Adam
adam = Adam(lr=0.0001)
model.compile(loss=binary_crossentropy_with_fbeta, 
                    optimizer=adam,
                    metrics=['accuracy'])

model.fit(x_train, y_train, batch_size=1000, epochs=4, verbose=1, 
          validation_data=(x_test, y_test))


Train on 35000 samples, validate on 5479 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1168b9ef0>

In [17]:
p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.12862794  0.00399382  0.61349905 ...,  0.01715657  0.19441065
   0.01727507]
 [ 0.13428035  0.00783145  0.39315739 ...,  0.02850366  0.37531179
   0.04404324]
 [ 0.35418913  0.13530651  0.61096793 ...,  0.03656367  0.35547483
   0.08723994]
 ..., 
 [ 0.41582555  0.00623724  0.05014039 ...,  0.00197364  0.8029018
   0.06146602]
 [ 0.19326404  0.00182726  0.11283141 ...,  0.00122805  0.64084274
   0.05530868]
 [ 0.07061765  0.00300248  0.15460153 ...,  0.00136772  0.16161974
   0.00513008]]
0.843678244271


In [18]:
from keras.optimizers import Adam
adam = Adam(lr=0.0001)
model.compile(loss=binary_crossentropy_with_fbeta, 
                    optimizer=adam,
                    metrics=['accuracy'])

model.fit(x_train, y_train, batch_size=1000, epochs=4, verbose=1, 
          validation_data=(x_test, y_test))


Train on 35000 samples, validate on 5479 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x11755e780>

In [19]:
p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.14767465  0.00520725  0.61837405 ...,  0.01714492  0.21554402
   0.01860865]
 [ 0.1472079   0.01019205  0.39849138 ...,  0.03245292  0.3943935
   0.05383373]
 [ 0.3559061   0.19416258  0.62946814 ...,  0.03721603  0.3249661
   0.10246442]
 ..., 
 [ 0.45915231  0.00909899  0.06510434 ...,  0.00286657  0.80963182
   0.08036362]
 [ 0.21001165  0.00250921  0.14867033 ...,  0.00166284  0.65772694
   0.08177613]
 [ 0.05048526  0.00229371  0.11570133 ...,  0.00105798  0.14404659
   0.00308428]]
0.84797659228


In [20]:
from keras.optimizers import Adam
adam = Adam(lr=0.00001)
model.compile(loss=binary_crossentropy_with_fbeta, 
                    optimizer=adam,
                    metrics=['accuracy'])

model.fit(x_train, y_train, batch_size=1000, epochs=4, verbose=1, 
          validation_data=(x_test, y_test))


Train on 35000 samples, validate on 5479 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x11fb56588>

In [21]:
p_valid = model.predict(x_test, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[ 0.1506494   0.0052235   0.62543684 ...,  0.01591036  0.21541731
   0.01841112]
 [ 0.15841334  0.01057222  0.39746171 ...,  0.03627377  0.39989465
   0.05637174]
 [ 0.35107708  0.19688928  0.63052875 ...,  0.03302256  0.32733831
   0.09991343]
 ..., 
 [ 0.47336614  0.00793939  0.05724641 ...,  0.00244323  0.81640249
   0.07375259]
 [ 0.217861    0.00186063  0.12753004 ...,  0.00137287  0.67238063
   0.07092487]
 [ 0.0537655   0.00230758  0.11045907 ...,  0.001137    0.15405549
   0.00335744]]
0.848078829556


idea for later....train with different losses??