In [None]:
import numpy as np 
import pandas as pd 
import os
import itertools

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Input, merge, UpSampling2D, Cropping2D, ZeroPadding2D, Reshape, core, Convolution2D, Conv2DTranspose
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from keras import optimizers
from keras import backend as K
from keras.optimizers import SGD
from keras.layers.merge import concatenate
from keras.preprocessing.image import array_to_img
from keras.preprocessing.image import load_img

from tensorflow.python.keras.callbacks import TensorBoard

from sklearn.metrics import fbeta_score
from sklearn.model_selection import train_test_split

from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

import preprocess
from model_00 import get_model
import pickle
import splitdata

import keras.backend as K
import tensorflow as tf

In [None]:
with open(os.path.join(splitdata.d_data, splitdata.f_train), "rb") as file:
    files = pickle.load(file)
if not files:
    raise Exception("Could not load training files!")
print("Files: ", len(files))
print(files)

In [None]:
np.random.shuffle(files)
n_valid = int(np.ceil(len(files) * 0.15))
files_train = files[n_valid:]
files_valid = files[:n_valid]
print("Files train: ", len(files_train))
print("Files valid: ", len(files_valid))

In [None]:
def augment_imgarr(x, i):
    if i % 4 == 0:
        return np.rot90(x, k=i)
    if i == 4:
        return np.flip(x, 0)
    if i == 5:
        return np.flip(x, 1)
    else:
        return np.transpose(x, axes=(1, 0, 2))

In [None]:
def calc_weights(y):
    n_ones = np.sum(y)
    n_total = y.shape[1]**2
    w_ones = 1 - n_ones / n_total
    w_zeros = 1 - w_ones
    w = y * (w_ones - w_zeros) + w_zeros
    return w

def dataGenerator(files, batches=1, weights=False):
    files = itertools.cycle(files)
    while True:
        X = np.zeros((batches, 2084, 2084, 3), dtype=np.float32)
        Y = np.zeros((batches, 2084, 2084, 1), dtype=np.float32)
        if weights:
            W = np.zeros((batches, 2084, 2084, 1), dtype=np.float32)
        for i in range(batches):
            f_png, f_csv, _ = next(files)
            x = np.array(load_img(f_png))
            y = preprocess.get_ndarray_from_csv(f_csv, X[i].shape[0], X[i].shape[1])
            x = x / 255
            idx = np.random.randint(0, 7)
            X[i] = augment_imgarr(x, idx)
            y = augment_imgarr(y, idx)
            Y[i] = y
            if weights:
                W[i] = calc_weights(y)
        if weights:
            yield X, W
        else:
            yield X, Y

In [None]:
dg = dataGenerator(files_train, batches=20, weights=True)
for i in range(2):
    X, Y = next(dg)
    print(X.shape, Y.shape)
    preprocess.show_ndimg(X[0], Y[0])
    

In [None]:
model = get_model(n_kernels=1, img_height=2084, img_width=2084)
print(model.summary())

In [None]:
model.compile(loss="binary_crossentropy",
              optimizer=optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.98, nesterov=True),
              metrics=['accuracy'])
#K.get_session().run(tf.global_variables_initializer())
#tensorboard = TensorBoard(log_dir="logs") 
callbacks = [
             EarlyStopping(monitor='val_loss',
                           patience=2,
                           verbose=1),
             ModelCheckpoint("model01.h5",
                             monitor='val_loss',
                             save_best_only=True,
                             verbose=1)]
train_generator = dataGenerator(files_train, batches=10, weights=True)
valid_generator = dataGenerator(files_valid, batches=3, weights=True)

In [None]:
# batch_size determines the number of samples in each mini batch
# steps_per_epoch the number of batch iterations before a training epoch is considered finished.
# validation_steps
hist = model.fit_generator(generator=train_generator,
                    steps_per_epoch=1,
                    epochs=10,
                    verbose=2,
                    callbacks=callbacks,
                    validation_data=valid_generator,
                    validation_steps=1,
                    max_queue_size=2)

In [None]:
training_loss = hist.history['loss']
test_loss = hist.history['val_loss']

epoch_count = range(1, len(training_loss) + 1)

plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show();

In [None]:
with open(os.path.join(splitdata.d_data, splitdata.f_test), "rb") as file:
    files_test = pickle.load(file)
if not files_test:
    raise Exception("Could not load training files!")

In [None]:
X_test, Y_test = preprocess.load_data(files_test)

In [None]:
X_test = X_test / 255

In [None]:
Y_pred = model.predict(X_test, batch_size=len(X_test), verbose=1)

In [None]:
print(Y_pred[0])

In [None]:
Y_pred = np.around(Y_pred)

In [None]:
for i in range(Y_pred.shape[0]):
    preprocess.show_ndimg(Y_test[i], Y_pred[i])

In [None]:
for i in range(Y_pred.shape[0]):
    print(np.sum(Y_pred[i]))