In [1]:
import numpy as np # linear algebra
import scipy as scipy
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
import dill

import keras as k
import cv2
from tqdm import tqdm


Using Theano backend.


In [2]:
with open('tiffs.pkl', 'rb') as in_strm:
    all_info = dill.load(in_strm)
x_train = np.array(all_info[0], dtype=np.float64)
x_test = np.array(all_info[1], dtype=np.float64)
y_train = all_info[2]
y_test = all_info[3]
label_map = all_info[4]
inv_label_map = all_info[5]
print(x_train.shape)

(35000, 32, 32, 4)


In [3]:
with open('basicFeatures.pkl', 'rb') as in_strm:
    features = dill.load(in_strm)

In [4]:
train_features = features[:8]
test_features = features[8:]

for i, train_feature in enumerate(train_features):
    train_feature = np.array(train_feature, dtype=np.float64)
    test_feature = np.array(test_features[i], dtype=np.float64)
    mean = np.mean(train_feature, axis = 0)
    train_feature -= mean # zero-center
    test_feature -= mean
    std = np.std(train_feature, axis = 0)
    train_feature /= std # normalize
    test_feature /= std
    train_features[i] = np.nan_to_num(train_feature)
    test_features[i] = np.nan_to_num(test_feature)
    
mean = np.mean(x_train, axis = 0)
x_train -= mean # zero-center
x_test -= mean
std = np.std(x_train, axis = 0)
x_train /= std # normalize
x_test /= std

#put into network
train_input = [x_train]
for train_feature in train_features:
    train_input.append(train_feature)

test_input = [x_test]
for test_feature in test_features:
    test_input.append(test_feature)

In [5]:
#creates unet block attached to previous_layer
#returns last layer of unet block
def create_unet_block(depth, width, drop_constant, act, previous_layer):
    prev = previous_layer
    toConcatLater = list()
    
    for cur_depth in range(depth):
        if cur_depth != 0:
            prev = MaxPooling2D(pool_size=(2, 2))(prev)
        conv_width = int(width * math.pow(2, cur_depth))
        conv1 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(prev)
        conv2 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(conv1)
        prev = conv2
        toConcatLater.append(conv2)
    prev = UpSampling2D(size=(2, 2))(prev)
    for cur_depth in reversed(range(depth - 1)):
        merged = concatenate([prev, toConcatLater[cur_depth]], axis=3)
        drop = Dropout(drop_constant)(merged)
        conv_width = int(width * math.pow(2, cur_depth))
        conv1 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(drop)
        conv2 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(conv1)
        up = UpSampling2D(size=(2, 2))(conv2)
        prev = up
    
    return prev

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Input, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras.layers.merge import concatenate
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
import math

#baseline part
original_input = Input(shape=(32, 32, 4))
unet_out = create_unet_block(5, 32, 0.4, 'relu', original_input)
flat1 = Flatten()(unet_out)

#mean
mean_input = Input(shape=(4,))

#std
std_input = Input(shape=(4,))

#var
var_input = Input(shape=(4,))

#moment
moment_input = Input(shape=(4,))

#entropy
entropy_input = Input(shape=(4,))

#dct
dct_input = Input(shape=(32, 32, 4))
flat2 = Flatten()(dct_input)

#cov
cov_input = Input(shape=(32, 32, 4))
flat3 = Flatten()(cov_input)

#auto
auto_input = Input(shape=(32, 32, 4))
flat4 = Flatten()(auto_input)

combine = concatenate([flat1, mean_input, std_input, var_input, 
                       moment_input, entropy_input, flat2, flat3, flat4])

dense8 = Dense(128, activation='relu')(combine)
dense9 = Dense(17, activation='sigmoid')(dense8)

model = Model(inputs=[original_input, mean_input, std_input, var_input, 
                      moment_input, entropy_input, dct_input, cov_input, auto_input], outputs=dense9)

model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
                    optimizer='adam',
                    metrics=['accuracy'])
model.summary()
model.fit(train_input, y_train, batch_size=128, epochs=6, verbose=1, validation_data=(test_input, y_test))

from sklearn.metrics import fbeta_score

p_valid = model.predict([x_test, test_edges], batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 32, 32, 4)     0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 32, 32, 32)    1184        input_1[0][0]                    
____________________________________________________________________________________________________
conv2d_2 (Conv2D)                (None, 32, 32, 32)    9248        conv2d_1[0][0]                   
____________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)   (None, 16, 16, 32)    0           conv2d_2[0][0]                   
___________________________________________________________________________________________

Total params: 26,201,873
Trainable params: 26,201,873
Non-trainable params: 0
____________________________________________________________________________________________________
Train on 35000 samples, validate on 5479 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
 2304/35000 [>.............................] - ETA: 3431s - loss: 0.1312 - acc: 0.9501

KeyboardInterrupt: 

In [8]:
from sklearn.metrics import fbeta_score

p_valid = model.predict(test_input, batch_size=128)
print(y_test)
print(p_valid)
print(fbeta_score(y_test, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 0]]
[[  1.00000000e+00   0.00000000e+00   1.00000000e+00 ...,   1.00000000e+00
    1.00000000e+00   0.00000000e+00]
 [  8.34041089e-02   2.07189121e-03   5.72080731e-01 ...,   2.77819834e-03
    2.57823408e-01   3.80425416e-02]
 [  2.70822436e-01   3.12816083e-01   9.40233469e-01 ...,   7.87496334e-04
    1.60940826e-01   1.41964834e-02]
 ..., 
 [  3.78402740e-01   1.62808574e-04   4.52405065e-02 ...,   1.80279239e-05
    8.25769603e-01   2.51490846e-02]
 [  3.98403645e-01   5.51078643e-04   9.43133086e-02 ...,   3.14959325e-05
    7.95565546e-01   4.79608588e-02]
 [  9.27788690e-02   5.60433883e-03   4.90881264e-01 ...,   1.81447249e-02
    2.22984344e-01   2.02748459e-02]]
0.87463517651


In [9]:
model.save('unets/best_so_far_width_5_width_32.h5')

OSError: Unable to create file (Unable to open file: name = 'unets/best_so_far_width_5_width_32.h5', errno = 2, error message = 'no such file or directory', flags = 13, o_flags = 602)

In [None]:
from keras.models import load_model
model = load_model('unets/best_so_far_width_5_width_32.h5')
epochs_arr = [2, 5, 5]
learn_rates = [0.001, 0.0001, 0.00001]
for learn_rate, epochs in zip(learn_rates, epochs_arr):
    adam = Adam(lr=learn_rate)
    model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
                    optimizer=adam,
                    metrics=['accuracy'])
    model.fit(train_input, y_train, batch_size=128, epochs=6, verbose=1, validation_data=(test_input, y_test))
