In [None]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [None]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from sklearn.metrics import f1_score
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.layers import Input, Conv2D, Add, BatchNormalization, concatenate, MaxPool2D, Flatten, Dense
from tensorflow.keras.models import load_model, Model
from sklearn.model_selection import train_test_split, KFold

In [None]:
x_train_ = np.load('/gdrive/My Drive/data/x_train.npy').astype('float32')
y_train_ = np.load('/gdrive/My Drive/data/y_train.npy').astype('float32')

x_train_180 = np.load('/gdrive/My Drive/data/x_train_180.npy').astype('float32')
y_train_180 = np.load('/gdrive/My Drive/data/y_train_180.npy').astype('float32')

x_train = np.concatenate((x_train_, x_train_180), axis = 0)
y_train = np.concatenate((y_train_, y_train_180), axis = 0)

del x_train_180, y_train_180, x_train_, y_train_

In [None]:
test = np.load('/gdrive/My Drive/data/test.npy').astype('float32')

In [None]:
x_train, y_train = x_train[:35000], y_train[:35000]
x_train.shape, y_train.shape

((35000, 40, 40, 10), (35000, 40, 40, 1))

In [None]:
test = test[:,:,:,:10]
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.025, random_state=7777)

((34125, 40, 40, 10), (34125, 40, 40, 1), (875, 40, 40, 10), (875, 40, 40, 1))

In [None]:
def mae_over_fscore(y_true, y_pred):
    '''
    y_true: sample_submission.csv 형태의 실제 값
    y_pred: sample_submission.csv 형태의 예측 값
    '''

    y_true = np.array(y_true)
    y_true = y_true.reshape(1, -1)[0]  
    
    y_pred = np.array(y_pred)
    y_pred = y_pred.reshape(1, -1)[0]
    
    # 실제값이 0.1 이상인 픽셀의 위치 확인
    IsGreaterThanEqualTo_PointOne = y_true >= 0.1
    
    # 실제 값에 결측값이 없는 픽셀의 위치 확인 
    IsNotMissing = y_true >= 0
    
    # mae 계산
    mae = np.mean(np.abs(y_true[IsGreaterThanEqualTo_PointOne] - y_pred[IsGreaterThanEqualTo_PointOne]))
    
    # f1_score 계산 위해, 실제값에 결측값이 없는 픽셀에 대해 1과 0으로 값 변환
    y_true = np.where(y_true[IsNotMissing] >= 0.1, 1, 0)
    
    y_pred = np.where(y_pred[IsNotMissing] >= 0.1, 1, 0)
    
    # f1_score 계산    
    f_score = f1_score(y_true, y_pred) 
    # f1_score가 0일 나올 경우를 대비하여 소량의 값 (1e-07) 추가 
    return mae / (f_score + 1e-07) 

In [None]:
def mae(y_true, y_pred):    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    y_true = y_true.reshape(1, -1)[0]
    
    y_pred = y_pred.reshape(1, -1)[0]
    
    over_threshold = y_true >= 0.1
    
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    y_true = y_true.reshape(1, -1)[0]
    
    y_pred = y_pred.reshape(1, -1)[0]
    
    remove_NAs = y_true >= 0
    
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

def fscore_keras(y_true, y_pred):
    score = tf.py_function(func=fscore, inp=[y_true, y_pred], Tout=tf.float32, name='fscore_keras')
    return score

def score(y_true, y_pred):
    score = tf.py_function(func=maeOverFscore, inp=[y_true, y_pred], Tout=tf.float32,  name='custom_mse') 
    return score

In [None]:
def create_model():
    inputs=Input(x_train.shape[1:])
    
    x = inputs
    y = BatchNormalization()(inputs)
    #x= Conv2D(32,kernel_size = 3, padding='same', activation = 'relu')(y)
    #x= Conv2D(32,kernel_size = 3, padding='same', activation = 'relu')(x)
    #y = BatchNormalization()(x)
    x= Conv2D(64,kernel_size = 3, padding='same', activation = 'relu')(y)
    x= Conv2D(64,kernel_size = 3, padding='same', activation = 'relu')(x)
    y = BatchNormalization()(x)
    x= Conv2D(128,kernel_size = 3, padding='same', activation = 'relu')(y)
    x= Conv2D(128,kernel_size = 3, padding='same', activation = 'relu')(x)
    y = BatchNormalization()(x)
    x= Conv2D(256,kernel_size = 3, padding='same', activation = 'relu')(y)
    x= Conv2D(256,kernel_size = 3, padding='same', activation = 'relu')(x)
    y = BatchNormalization()(x)

    outputs = Conv2D(1, kernel_size=1, strides =1, padding='same', activation='relu')(y)
    
    model = Model(inputs, outputs)
#    model.summary()
    return model

In [None]:
def train_model(x_data, y_data, k):
    
    k_fold = KFold(n_splits=k, shuffle=True, random_state=7777) # k_fold 데이터 셋 만들꺼 랜덤으로 셔플
    
    model_number = 0
    for train_idx, val_idx in k_fold.split(x_data, y_data): #k_fold 데이터 셋으로 split 해서 만들고, 한번씩 모델 돌림
        x_train, y_train = x_data[train_idx], y_data[train_idx]
        x_val, y_val = x_data[val_idx], y_data[val_idx]

#        aug_train_x, aug_train_y = data_generator(x_train, y_train, model_number)
#        print(aug_train_x.shape, aug_train_y.shape)
      
        model = create_model()

        model.compile(loss='mae', optimizer='adam', metrics=[score, fscore_keras])

        callbacks_list = [
              tf.keras.callbacks.ReduceLROnPlateau(
                  monitor='val_loss',
                  patience=3,
                  factor=0.75
              ),

              tf.keras.callbacks.ModelCheckpoint(
                  filepath = '/gdrive/My Drive/models/model'+str(model_number)+'.h5',
                  monitor='val_score',
                  save_best_only=True
              )
          ]

        model.fit(x_train, y_train, epochs=50, batch_size=128, validation_data=(x_val, y_val), callbacks=callbacks_list)
        del x_train, y_train, x_val, y_val
        model_number+=1

In [None]:
k = 4
models = []

train_model(x_train, y_train, k)

for n in range(k):
    model = load_model('/gdrive/My Drive/models/model'+str(n)+'.h5', custom_objects = {'score':score,'fscore_keras':fscore_keras})
    models.append(model)

Epoch 1/50

In [None]:
preds = []
for model in models:
    preds.append(model.predict(x_test))
    print(mae_over_fscore(y_test, preds[-1]))

pred = sum(preds)/len(preds)
print(mae_over_fscore(y_test, pred))

In [None]:
preds = []
for model in models:
    preds.append(model.predict(test))

pred = sum(preds)/len(preds)

In [None]:
submission = pd.read_csv('/gdrive/My Drive/sample_submission.csv')
submission.iloc[:,1:] = pred.reshape(-1,1600)
submission
submission.to_csv('/gdrive/My Drive/Decon_submission4.csv', index=False)