In [0]:
from google.colab import drive
drive.mount('/gdrive')


In [0]:

import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.layers import Input, Conv2D, Add, BatchNormalization, concatenate
from tensorflow.keras.models import Model

from sklearn.model_selection import KFold

In [0]:

x_train = np.load('/gdrive/My Drive/data_dacon/data/x_train.npy')
y_train = np.load('/gdrive/My Drive/data_dacon/data/y_train.npy')


In [0]:
from sklearn.metrics import f1_score

def mae(y_true, y_pred) :
    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    y_true = y_true.reshape(1, -1)[0]
    
    y_pred = y_pred.reshape(1, -1)[0]
    
    over_threshold = y_true >= 0.1
    
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    y_true = y_true.reshape(1, -1)[0]
    
    y_pred = y_pred.reshape(1, -1)[0]
    
    remove_NAs = y_true >= 0
    
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

def fscore_keras(y_true, y_pred):
    score = tf.py_function(func=fscore, inp=[y_true, y_pred], Tout=tf.float32, name='fscore_keras')
    return score

def score(y_true, y_pred):
    score = tf.py_function(func=maeOverFscore, inp=[y_true, y_pred], Tout=tf.float32,  name='custom_mse') 
    return score

In [0]:
def build_model():
  inputs = Input(x_train.shape[1:])
  bn=BatchNormalization()(inputs)
  conv0=Conv2D(32, kernel_size=1, strides=1, padding='same', activation='relu')(bn)
  
  bn=BatchNormalization()(conv0)
  conv=Conv2D(16, kernel_size=2, strides=1, padding='same', activation='relu')(bn)
  conc =concatenate([conv0, conv], axis=3)

  bn=BatchNormalization()(conc)
  
  outputs = Conv2D(1, kernel_size=1, strides=1, padding='same', activation='relu')(bn)

  model = Model(inputs,outputs)

  return model

In [0]:
def train_model(x_data, y_data, k,test):
    
    k_fold = KFold(n_splits=k, shuffle=True, random_state=7676)
    
    predicts = []
  
    for train_idx, val_idx in k_fold.split(x_data):
        x_train, y_train = x_data[train_idx], y_data[train_idx]
        x_val, y_val = x_data[val_idx], y_data[val_idx]
        
        model = build_model()
        model.compile(loss='mae', optimizer='adam', metrics=[score, fscore_keras])
        model.fit(x_train, y_train, epochs=50, batch_size=128, validation_data=(x_val, y_val))
        predicts.append(model.predict(test))
        
    return predicts

In [0]:
submission = pd.read_csv('/gdrive/My Drive/data_dacon/data/sample_submission.csv')
test = np.load('/gdrive/My Drive/data_dacon/data/newtest.npy')
test = test[:,:,:,:10]

In [0]:
predicts = train_model(x_train, y_train, 5,test)

predict = sum(predicts)/len(predicts)

submission.iloc[:,1:] = predict.reshape(-1,1600)
submission.to_csv('/gdrive/My Drive/data_dacon/data/submission.csv', index=False)