In [None]:
import glob
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import random
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Conv2DTranspose, MaxPooling2D, BatchNormalization, Activation, concatenate, Input, GlobalAveragePooling2D
from tensorflow.keras import Model
import warnings
 
warnings.filterwarnings("ignore")

In [None]:
from sklearn.metrics import f1_score

def mae(y_true, y_pred) :
    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    y_true = y_true.reshape(1, -1)[0]
    
    y_pred = y_pred.reshape(1, -1)[0]
    
    over_threshold = y_true >= 0.1
    
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    y_true = y_true.reshape(1, -1)[0]
    
    y_pred = y_pred.reshape(1, -1)[0]
    
    remove_NAs = y_true >= 0
    
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

def fscore_keras(y_true, y_pred):
    score = tf.py_function(func=fscore, inp=[y_true, y_pred], Tout=tf.float32, name='fscore_keras')
    return score

def maeOverFscore_keras(y_true, y_pred):
    score = tf.py_function(func=maeOverFscore, inp=[y_true, y_pred], Tout=tf.float32,  name='maeOverFscore_keras') 
    return score

In [None]:
# 재생산성을 위해 시드 고정
SEED = 32
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
def trainGenerator():
    train_path = 'inputs/train/'
    rot_path = 'inputs/rot/'
    train_files = os.listdir(train_path)
    rot_files = os.listdir(rot_path)
    all_files = []
    for i in train_files:
        all_files.append(train_path + i)
    for i in rot_files:
        all_files.append(rot_path + i)
    
    for file in all_files:
        npy = np.load(file)
        if npy[:,:,-1].sum() < 0:
            continue
            
        target= npy[:,:,14].reshape(40,40,1)
        
        terrain = (npy[:,:,9] / 100).astype('int8').reshape(40,40,1)
        npy = np.concatenate((npy, terrain), axis=-1)
        
        feature = npy[:,:,[0,1,2,3,4,5,6,7,8,9,15]]

        yield (feature, target)
        
train_dataset = tf.data.Dataset.from_generator(trainGenerator, (tf.float32, tf.float32), (tf.TensorShape([40,40,11]),tf.TensorShape([40,40,1])))

In [None]:
train_dataset = train_dataset.batch(256).prefetch(1)

In [None]:
test_path = 'inputs/test'
test_files = sorted(glob.glob(test_path + '/*'))

X_test = []

for file in tqdm(test_files, desc = 'test'):
    
    npy = np.load(file)
    terrain = (npy[:,:,9] / 100).astype('int8').reshape(40,40,1)
    npy = np.concatenate((npy, terrain), axis=-1)
    X_test.append(npy[:,:,[0,1,2,3,4,5,6,7,8,9,14]])
                  
X_test = np.array(X_test)

In [None]:
def build_model(input_layer, start_neurons):
    
    # 40 x 40 -> 20 x 20
    conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(input_layer)
    conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1)
    pool1 = BatchNormalization()(conv1)
    pool1 = MaxPooling2D((2, 2))(pool1)
    pool1 = Dropout(0.25)(pool1)

    # 20 x 20 -> 10 x 10
    conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(pool1)
    conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(conv2)
    pool2 = BatchNormalization()(conv2)
    pool2 = MaxPooling2D((2, 2))(pool2)
    pool2 = Dropout(0.25)(pool2)

    # 10 x 10 
    convm = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(pool2)

    # 10 x 10 -> 20 x 20
    deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(convm)
    uconv2 = concatenate([deconv2, conv2])
    uconv2 = Dropout(0.25)(uconv2)
    uconv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(uconv2)
    uconv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(uconv2)
    uconv2 = BatchNormalization()(uconv2)

    # 20 x 20 -> 40 x 40
    deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
    uconv1 = concatenate([deconv1, conv1])
    uconv1 = Dropout(0.25)(uconv1)
    uconv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(uconv1)
    uconv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(uconv1)
    uconv1 = BatchNormalization()(uconv1)
    uconv1 = Dropout(0.25)(uconv1)
    output_layer = Conv2D(1, (1,1), padding="same", activation='relu')(uconv1)
    
    return output_layer

input_layer = Input((40, 40, 11))
output_layer = build_model(input_layer, 32)

In [None]:
model = Model(input_layer, output_layer)

In [None]:
model.compile(loss="mae", optimizer="adam", metrics=["mae"])

In [None]:
model_history = model.fit(train_dataset, epochs = 300, verbose=1)

In [None]:
# pred = model.predict(X_test)

In [None]:
model_history

In [None]:
%%time
import pickle
# 약 20초 걸림
with open('D:/inputs/24/train.pickle', 'rb') as f:
    train = pickle.load(f)
with open('D:/inputs/24/train_y.pickle', 'rb') as f:
    train_y = pickle.load(f)
    train_y = train_y.reshape(train_y.shape[0], 40, 40, 1)

In [None]:
terrain = (train[:,:,:,9] / 100).astype('int8').reshape(len(train), 40,40,1)
train = np.concatenate((train, terrain), axis=-1)
train = train[:,:,:,[0,1,2,3,4,5,6,7,8,9,14]]
score = maeOverFscore(train_y, model.predict(train))

In [None]:
score

In [None]:
model.save('allrot300.h5')

In [None]:
model_history = model.fit(train_dataset, epochs = 100, verbose=1)

In [None]:
score = maeOverFscore(train_y, model.predict(train))
score

In [None]:
pred = model.predict(X_test)
submission = pd.read_csv('inputs/sample_submission.csv')
submission.iloc[:,1:] = pred.reshape(-1, 1600)
submission.to_csv('1.374.csv', index = False)