In [2]:
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

import tensorflow.keras as keras
import tensorflow.keras.backend as K

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten,MaxPooling2D,BatchNormalization,Lambda, AveragePooling2D, Dropout, SpatialDropout2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

  from ._conv import register_converters as _register_converters


In [None]:
train = pd.read_csv('/kaggle/input/dacon-crash-prediction/train_features.csv')
test = pd.read_csv('/kaggle/input/dacon-crash-prediction/test_features.csv')
train_target = pd.read_csv('/kaggle/input/dacon-crash-prediction/train_target.csv')

In [None]:
def JB(y):
    result = []
    for i, value in enumerate(y):
        if i == 0:
            result.append(value)
            continue
        result.append(result[-1] + value)
    return result

def JB_applying(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        train[S + '_JB'] = 0
        for id in train['id'].unique():
            values = train[S][train['id']==id]
            train[S + '_JB'][train['id']==id] = JB(values)
    return train

def JB2_applying(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        train[S + '_JB2'] = 0
        for id in train['id'].unique():
            values = train[S + '_JB'][train['id']==id]
            train[S + '_JB2'][train['id']==id] = JB(values)
    return train

def JB3_applying(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        train[S + '_JB3'] = 0
        for id in train['id'].unique():
            values = train[S + '_JB2'][train['id']==id]
            train[S + '_JB3'][train['id']==id] = JB(values)
    return train

def new_var(train):
    train['S1-S2'] = train['S1'] - train['S2']
    train['S3-S4'] = train['S3'] - train['S4']
    return train

def arr_time(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        values = []
        for id_num in sorted(train['id'].unique()):
            arr_time = 375 - (train[train['id']==id_num][S]!=0).sum()
            values += [0]*(arr_time-1) + [1]*(376-arr_time)
        train[S +'_AT'] = values
    return train

def AT_diff(train):
    train['S1-S2_AT'] = train['S1_AT'] - train['S2_AT']
    train['S3-S4_AT'] = train['S3_AT'] - train['S4_AT']
    return train

def getting_convex(train, id, S, desc=True, is_abs=False):
    vex = []
    prev_value = 0
    prev_trend = 0
    trend = 0
    for idx, value in enumerate(train[train['id']==id][S]):
        diff = value- prev_value
        if diff > 0:
            trend = 1
        elif diff == 0:
            trend = 0
            if prev_trend !=0:
                vex.append((prev_value, idx))
        else: 
            trend = -1

        if trend * prev_trend < 0 :
            if is_abs:
                vex.append((abs(prev_value), idx))
            else:
                vex.append((prev_value, idx))
        prev_value = value
        prev_trend = trend

    result = sorted(vex, key= lambda x: -x[0])
    if desc == False:
        result = sorted(vex, key= lambda x: x[0])
    return result


def checking_convex(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        value = []
        for id in train['id'].unique():
            tmp = np.array([0]*375)
            tmp[np.array(list(map(lambda x:x[1], getting_convex(train, id, S))))] = 1
            value += list(tmp)
        train[S +'_convex'] = value
    return train

def checking_convex_JB(train):
    for S in ['S1_JB', 'S2_JB', 'S3_JB', 'S4_JB']:
        value = []
        for id in train['id'].unique():
            tmp = np.array([0]*375)
            tmp[np.array(list(map(lambda x:x[1], getting_convex(train, id, S))))] = 1
            value += list(tmp)
        train[S +'_convex'] = value
    return train


def getting_conflict(train, id, S): # 이 지점을 다 1로? 
    result = []
    prev = 0
    for value, t, in sorted(getting_convex(train, id, S), key=lambda x: x[1]):
        if prev * value >0:
            result.append((t, value))
        prev = value
    return result

def checking_conflict(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        value = []
        for id in train['id'].unique():
            idx = list(map(lambda x:x[0], getting_conflict(train, id, S)))
            if idx == []:
                tmp = [0]*375
            else:
                tmp = np.array([0]*375)
                tmp[np.array(idx)] = 1
            value += list(tmp)
        train[S +'_conflict'] = value
    return train


def first_conflict_time(train): 
    for S in ['S1', 'S2', 'S3', 'S4']:
        value = []
        for id in train['id'].unique():
            conflicts = getting_conflict(train, id, S) 
            if conflicts == []:
                value += [1]*375
            else:
                value += [1]*(conflicts[0][0]-1) + [0]*(376 - conflicts[0][0])
        train[S +'_Fir_Conf_T'] = value
    return train

def getting_trend(train, id, S):
    result = []
    prev_value = 0
    prev_trend = 0
    trend = 0
    for idx, value in enumerate(train[train['id']==id][S]):
        diff = value- prev_value
        if diff > 0:
            trend = 1
        elif diff == 0:
            trend = 0
        else: 
            trend = -1
        result.append(trend)
        prev_value = value
        prev_trend = trend
    return result

def checking_trend(train):
    for S in ['S1', 'S2', 'S3', 'S4']:
        value = []
        for id in train['id'].unique():
            value += getting_trend(train, id, S)
        train[S + '_trend'] = value
    return train

### Result 1

In [None]:
# Feature Engineering
sample_submission = pd.read_csv('/kaggle/input/dacon-crash-prediction/sample_submission.csv')

master = JB_applying(train)
master = JB2_applying(master)
master = arr_time(master)
master = checking_convex(master)

test_master = JB_applying(test)
test_master = JB2_applying(test_master)
test_master = arr_time(test_master)
test_master = checking_convex(test_master)

# reshape
master_np = np.reshape(np.array(master)[:,1:], (2800, 375, master.shape[1]-1, 1)) # id 컬럼 생략
test_master_np = np.reshape(np.array(test_master)[:,1:], (700, 375, test_master.shape[1]-1, 1))

X_train = master_np.copy()
Y_train = np.array(train_target)[:,1:]
X_test = test_master_np.copy()

print(X_train.shape, np.isnan(X_train).sum().sum())
print(Y_train.shape, np.isnan(Y_train).sum().sum())
print(X_test.shape, np.isnan(X_test).sum().sum())

def my_loss(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult))

def my_loss_E1(y_true, y_pred):
    return K.mean(K.square(y_true-y_pred)*np.array([1,1,0,0]))/2e+04

def my_loss_E2_M(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,1,0]))

def my_loss_E2_V(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,0,1]))

def set_model(target_idx):  # 0:x,y, 1:m, 2:v
    
    activation = 'elu'
    padding = 'valid'
    model = Sequential()
    nf = 16
    fs = (3,1)
    model.add(Conv2D(nf,fs, padding=padding, activation=activation,input_shape=(X_train.shape[1],X_train.shape[2],X_train.shape[3]))) # # of layers, filter size
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 186, 13, 16

    model.add(Conv2D(nf*2,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  92, 13, 32

    model.add(Conv2D(nf*4,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  45, 13, 64

    model.add(Conv2D(nf*8,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  21, 13, 128

    model.add(Conv2D(nf*16,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 9, 13, 256

    model.add(Conv2D(nf*32,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1)))  # 3, 13, 512

    
    model.add(Flatten()) 
    model.add(Dense(128, activation ='elu')) # output size만 적어줌
    model.add(Dense(64, activation ='elu'))
    model.add(Dense(32, activation ='elu'))
    model.add(Dense(16, activation ='elu'))
    model.add(Dense(4))       

    optimizer = keras.optimizers.Adam()

    if target_idx==0:
        model.compile(loss=my_loss_E1,
                  optimizer=optimizer,
                     )
    elif target_idx==1:
        model.compile(loss=my_loss_E2_M,
                  optimizer=optimizer,
                 )
    else:
        model.compile(loss=my_loss_E2_V,
                  optimizer=optimizer,
                 )
    return model

def train(model, X, Y, is_val=False):
    MODEL_SAVE_FOLDER_PATH = './model/'
    if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
        os.mkdir(MODEL_SAVE_FOLDER_PATH)

    model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'
    best_save = ModelCheckpoint('best_m.hdf5', save_best_only=True, monitor='val_loss', mode='min')

    if is_val == False:
        history = model.fit(X, Y,
                      epochs=150,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])

        fig, loss_ax = plt.subplots()
        acc_ax = loss_ax.twinx()

        loss_ax.plot(history.history['loss'], 'y', label='train loss')
        loss_ax.plot(history.history['val_loss'], 'r', label='val loss')
        loss_ax.set_xlabel('epoch')
        loss_ax.set_ylabel('loss')
        loss_ax.legend(loc='upper left')
        plt.show()    
        
    else:
        history = model.fit(X, Y,
                      epochs=150,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])
    
    return model

def load_best_model(target_idx):

    if target_idx == 0:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E1': my_loss, })
    elif target_idx == 1:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_M': my_loss, })
    else:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_V': my_loss, })

    score = model.evaluate(X_train, Y_train, verbose=0) 
    print('loss:', score)

    return model

n_model = 35
for i in range(n_model):
    idx, _ = train_test_split(np.arange(2800), test_size=0.2)
    X_train_bag = X_train[idx,:]
    Y_train_bag = Y_train[idx,:]
    
    for target_idx in [0,1,2]: # 학습 순서 조정 
        model = set_model(target_idx)
        train(model, X_train_bag, Y_train_bag)    
        best_model = load_best_model(target_idx)
        pred = best_model.predict(X_test)

        if target_idx == 0: # x, y 학습
            sample_submission.iloc[:,1] += pred[:,0]
            sample_submission.iloc[:,2] += pred[:,1]
    #         X_train = np.concatenate([X_train, np.reshape(np.repeat(Y_train[:,0], 375), (2800,375,1,1))], axis=2) # X 설명변수로 추가
    #         X_test = np.concatenate([X_test, np.reshape(np.repeat(pred[:,0], 375), (700,375,1,1))], axis=2)

        elif target_idx == 1: # m 학습
            sample_submission.iloc[:,3] += pred[:,2]

        elif target_idx == 2: # v 학습
            sample_submission.iloc[:,4] += pred[:,3]
            
sample_submission[['X', 'Y', 'M', 'V']] = sample_submission[['X', 'Y', 'M', 'V']]/n_model

result1 = sample_submission.copy()

### Result 2

In [None]:
# Feature Engineering
sample_submission = pd.read_csv('/kaggle/input/dacon-crash-prediction/sample_submission.csv')

train = JB_applying(train); train = JB2_applying(train)
master = JB_applying(train); master = JB2_applying(master)

test = JB_applying(test); test = JB2_applying(test)
test_master = JB_applying(test); test_master = JB2_applying(test_master)

# reshape
master_np = np.reshape(np.array(master)[:,1:], (2800, 375, master.shape[1]-1, 1)) # id 컬럼 생략
test_master_np = np.reshape(np.array(test_master)[:,1:], (700, 375, test_master.shape[1]-1, 1))

X_train = master_np.copy()
Y_train = np.array(train_target)[:,1:]
X_test = test_master_np.copy()

print(X_train.shape, np.isnan(X_train).sum().sum())
print(Y_train.shape, np.isnan(Y_train).sum().sum())
print(X_test.shape, np.isnan(X_test).sum().sum())

def my_loss(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult))

def my_loss_E1(y_true, y_pred):
    return K.mean(K.square(y_true-y_pred)*np.array([1,1,0,0]))/2e+04

def my_loss_E2_M(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,1,0]))

def my_loss_E2_V(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,0,1]))

def set_model(target_idx):  # 0:x,y, 1:m, 2:v
    
    activation = 'elu'
    padding = 'valid'
    model = Sequential()
    nf = 16
    fs = (3,1)
    model.add(Conv2D(nf,fs, padding=padding, activation=activation,input_shape=(X_train.shape[1],X_train.shape[2],X_train.shape[3]))) # # of layers, filter size
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 186, 13, 16

    model.add(Conv2D(nf*2,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  92, 13, 32

    model.add(Conv2D(nf*4,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  45, 13, 64

    model.add(Conv2D(nf*8,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  21, 13, 128

    model.add(Conv2D(nf*16,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 9, 13, 256

    model.add(Conv2D(nf*32,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1)))  # 3, 13, 512

    
    model.add(Flatten()) 
    model.add(Dense(128, activation ='elu')) # output size만 적어줌
    model.add(Dense(64, activation ='elu'))
    model.add(Dense(32, activation ='elu'))
    model.add(Dense(16, activation ='elu'))
    model.add(Dense(4))       

    optimizer = keras.optimizers.Adam()

    if target_idx==0:
        model.compile(loss=my_loss_E1,
                  optimizer=optimizer,
                     )
    elif target_idx==1:
        model.compile(loss=my_loss_E2_M,
                  optimizer=optimizer,
                 )
    else:
        model.compile(loss=my_loss_E2_V,
                  optimizer=optimizer,
                 )
    return model

def train(model, X, Y, is_val=False):
    MODEL_SAVE_FOLDER_PATH = './model/'
    if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
        os.mkdir(MODEL_SAVE_FOLDER_PATH)

    model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'
    best_save = ModelCheckpoint('best_m.hdf5', save_best_only=True, monitor='val_loss', mode='min')

    if is_val == False:
        history = model.fit(X, Y,
                      epochs=100,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])

        fig, loss_ax = plt.subplots()
        acc_ax = loss_ax.twinx()

        loss_ax.plot(history.history['loss'], 'y', label='train loss')
        loss_ax.plot(history.history['val_loss'], 'r', label='val loss')
        loss_ax.set_xlabel('epoch')
        loss_ax.set_ylabel('loss')
        loss_ax.legend(loc='upper left')
        plt.show()    
        
    else:
        history = model.fit(X, Y,
                      epochs=100,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])
    
    return model

def load_best_model(target_idx):

    if target_idx == 0:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E1': my_loss, })
    elif target_idx == 1:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_M': my_loss, })
    else:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_V': my_loss, })

    score = model.evaluate(X_train, Y_train, verbose=0) 
    print('loss:', score)

    return model

for target_idx in [0,1,2]: # 학습 순서 조정 
    model = set_model(target_idx)
    train(model,X_train, Y_train)    
    best_model = load_best_model(target_idx)
    pred = best_model.predict(X_test)
    
    if target_idx == 0: # x, y 학습
        sample_submission.iloc[:,1] = pred[:,0]
        sample_submission.iloc[:,2] = pred[:,1]
#         X_train = np.concatenate([X_train, np.reshape(np.repeat(Y_train[:,0], 375), (2800,375,1,1))], axis=2) # X 설명변수로 추가
#         X_test = np.concatenate([X_test, np.reshape(np.repeat(pred[:,0], 375), (700,375,1,1))], axis=2)

    elif target_idx == 1: # m 학습
        sample_submission.iloc[:,3] = pred[:,2]

    elif target_idx == 2: # v 학습
        sample_submission.iloc[:,4] = pred[:,3]

result2 = sample_submission.copy()

### Result 3

In [None]:
# Feature Engineering
sample_submission = pd.read_csv('/kaggle/input/dacon-crash-prediction/sample_submission.csv')

master = JB_applying(train)
master = JB2_applying(master)
master = arr_time(master)

test_master = JB_applying(test)
test_master = JB2_applying(test_master)
test_master = arr_time(test_master)

# reshape
master_np = np.reshape(np.array(master)[:,1:], (2800, 375, master.shape[1]-1, 1)) # id 컬럼 생략
test_master_np = np.reshape(np.array(test_master)[:,1:], (700, 375, test_master.shape[1]-1, 1))

X_train = master_np.copy()
Y_train = np.array(train_target)[:,1:]
X_test = test_master_np.copy()

print(X_train.shape, np.isnan(X_train).sum().sum())
print(Y_train.shape, np.isnan(Y_train).sum().sum())
print(X_test.shape, np.isnan(X_test).sum().sum())

def my_loss(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult))

def my_loss_E1(y_true, y_pred):
    return K.mean(K.square(y_true-y_pred)*np.array([1,1,0,0]))/2e+04

def my_loss_E2_M(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,1,0]))

def my_loss_E2_V(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,0,1]))

def set_model(target_idx):  # 0:x,y, 1:m, 2:v
    
    activation = 'elu'
    padding = 'valid'
    model = Sequential()
    nf = 16
    fs = (3,1)
    model.add(Conv2D(nf,fs, padding=padding, activation=activation,input_shape=(X_train.shape[1],X_train.shape[2],X_train.shape[3]))) # # of layers, filter size
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 186, 13, 16

    model.add(Conv2D(nf*2,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  92, 13, 32

    model.add(Conv2D(nf*4,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  45, 13, 64

    model.add(Conv2D(nf*8,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  21, 13, 128

    model.add(Conv2D(nf*16,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 9, 13, 256

    model.add(Conv2D(nf*32,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1)))  # 3, 13, 512

    
    model.add(Flatten()) 
    model.add(Dense(128, activation ='elu')) # output size만 적어줌
    model.add(Dense(64, activation ='elu'))
    model.add(Dense(32, activation ='elu'))
    model.add(Dense(16, activation ='elu'))
    model.add(Dense(4))       

    optimizer = keras.optimizers.Adam()

    if target_idx==0:
        model.compile(loss=my_loss_E1,
                  optimizer=optimizer,
                     )
    elif target_idx==1:
        model.compile(loss=my_loss_E2_M,
                  optimizer=optimizer,
                 )
    else:
        model.compile(loss=my_loss_E2_V,
                  optimizer=optimizer,
                 )
    return model

def train(model, X, Y, is_val=False):
    MODEL_SAVE_FOLDER_PATH = './model/'
    if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
        os.mkdir(MODEL_SAVE_FOLDER_PATH)

    model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'
    best_save = ModelCheckpoint('best_m.hdf5', save_best_only=True, monitor='val_loss', mode='min')

    if is_val == False:
        history = model.fit(X, Y,
                      epochs=100,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])

        fig, loss_ax = plt.subplots()
        acc_ax = loss_ax.twinx()

        loss_ax.plot(history.history['loss'], 'y', label='train loss')
        loss_ax.plot(history.history['val_loss'], 'r', label='val loss')
        loss_ax.set_xlabel('epoch')
        loss_ax.set_ylabel('loss')
        loss_ax.legend(loc='upper left')
        plt.show()    
        
    else:
        history = model.fit(X, Y,
                      epochs=100,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])
    
    return model

def load_best_model(target_idx):

    if target_idx == 0:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E1': my_loss, })
    elif target_idx == 1:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_M': my_loss, })
    else:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_V': my_loss, })

    score = model.evaluate(X_train, Y_train, verbose=0) 
    print('loss:', score)

    return model

for target_idx in [0,1,2]: # 학습 순서 조정 
    model = set_model(target_idx)
    train(model,X_train, Y_train)    
    best_model = load_best_model(target_idx)
    pred = best_model.predict(X_test)
    
    if target_idx == 0: # x, y 학습
        sample_submission.iloc[:,1] = pred[:,0]
        sample_submission.iloc[:,2] = pred[:,1]
#         X_train = np.concatenate([X_train, np.reshape(np.repeat(Y_train[:,0], 375), (2800,375,1,1))], axis=2) # X 설명변수로 추가
#         X_test = np.concatenate([X_test, np.reshape(np.repeat(pred[:,0], 375), (700,375,1,1))], axis=2)

    elif target_idx == 1: # m 학습
        sample_submission.iloc[:,3] = pred[:,2]

    elif target_idx == 2: # v 학습
        sample_submission.iloc[:,4] = pred[:,3]

result3 = sample_submission.copy()

### Result 4

In [None]:
# Feature Engineering
sample_submission = pd.read_csv('/kaggle/input/dacon-crash-prediction/sample_submission.csv')

master = JB_applying(train)
master = JB2_applying(master)
master = arr_time(master)
master = checking_convex(master)
master = AT_diff(master)

test_master = JB_applying(test)
test_master = JB2_applying(test_master)
test_master = arr_time(test_master)
test_master = checking_convex(test_master)
test_master = AT_diff(test_master)

# reshape
master_np = np.reshape(np.array(master)[:,1:], (2800, 375, master.shape[1]-1, 1)) # id 컬럼 생략
test_master_np = np.reshape(np.array(test_master)[:,1:], (700, 375, test_master.shape[1]-1, 1))

X_train = master_np.copy()
Y_train = np.array(train_target)[:,1:]
X_test = test_master_np.copy()

print(X_train.shape, np.isnan(X_train).sum().sum())
print(Y_train.shape, np.isnan(Y_train).sum().sum())
print(X_test.shape, np.isnan(X_test).sum().sum())

def my_loss(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult))

def my_loss_E1(y_true, y_pred):
    return K.mean(K.square(y_true-y_pred)*np.array([1,1,0,0]))/2e+04

def my_loss_E2_M(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,1,0]))

def my_loss_E2_V(y_true, y_pred):
    divResult = Lambda(lambda x: x[0]/x[1])([(y_pred-y_true),(y_true+0.000001)])
    return K.mean(K.square(divResult)*np.array([0,0,0,1]))

def set_model(target_idx):  # 0:x,y, 1:m, 2:v
    
    activation = 'elu'
    padding = 'valid'
    model = Sequential()
    nf = 16
    fs = (3,1)
    model.add(Conv2D(nf,fs, padding=padding, activation=activation,input_shape=(X_train.shape[1],X_train.shape[2],X_train.shape[3]))) # # of layers, filter size
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 186, 13, 16

    model.add(Conv2D(nf*2,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  92, 13, 32

    model.add(Conv2D(nf*4,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  45, 13, 64

    model.add(Conv2D(nf*8,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) #  21, 13, 128

    model.add(Conv2D(nf*16,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1))) # 9, 13, 256

    model.add(Conv2D(nf*32,fs, padding=padding, activation=activation))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 1)))  # 3, 13, 512

    
    model.add(Flatten()) 
    model.add(Dense(128, activation ='elu')) # output size만 적어줌
    model.add(Dense(64, activation ='elu'))
    model.add(Dense(32, activation ='elu'))
    model.add(Dense(16, activation ='elu'))
    model.add(Dense(4))       

    optimizer = keras.optimizers.Adam()

    if target_idx==0:
        model.compile(loss=my_loss_E1,
                  optimizer=optimizer,
                     )
    elif target_idx==1:
        model.compile(loss=my_loss_E2_M,
                  optimizer=optimizer,
                 )
    else:
        model.compile(loss=my_loss_E2_V,
                  optimizer=optimizer,
                 )
    return model

def train(model, X, Y, is_val=False):
    MODEL_SAVE_FOLDER_PATH = './model/'
    if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
        os.mkdir(MODEL_SAVE_FOLDER_PATH)

    model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'
    best_save = ModelCheckpoint('best_m.hdf5', save_best_only=True, monitor='val_loss', mode='min')

    if is_val == False:
        history = model.fit(X, Y,
                      epochs=150,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])

        fig, loss_ax = plt.subplots()
        acc_ax = loss_ax.twinx()

        loss_ax.plot(history.history['loss'], 'y', label='train loss')
        loss_ax.plot(history.history['val_loss'], 'r', label='val loss')
        loss_ax.set_xlabel('epoch')
        loss_ax.set_ylabel('loss')
        loss_ax.legend(loc='upper left')
        plt.show()    
        
    else:
        history = model.fit(X, Y,
                      epochs=150,
                      batch_size=256,
                      shuffle=True,
                      validation_split=0.2,
                      verbose = 0,
                      callbacks=[best_save])
    
    return model

def load_best_model(target_idx):

    if target_idx == 0:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E1': my_loss, })
    elif target_idx == 1:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_M': my_loss, })
    else:
        model = load_model('best_m.hdf5' , custom_objects={'my_loss_E2_V': my_loss, })

    score = model.evaluate(X_train, Y_train, verbose=0) 
    print('loss:', score)

    return model

for target_idx in [0,1,2]: # 학습 순서 조정 
    model = set_model(target_idx)
    train(model,X_train, Y_train)    
    best_model = load_best_model(target_idx)
    pred = best_model.predict(X_test)
    
    if target_idx == 0: # x, y 학습
        sample_submission.iloc[:,1] = pred[:,0]
        sample_submission.iloc[:,2] = pred[:,1]
#         X_train = np.concatenate([X_train, np.reshape(np.repeat(Y_train[:,0], 375), (2800,375,1,1))], axis=2) # X 설명변수로 추가
#         X_test = np.concatenate([X_test, np.reshape(np.repeat(pred[:,0], 375), (700,375,1,1))], axis=2)

    elif target_idx == 1: # m 학습
        sample_submission.iloc[:,3] = pred[:,2]

    elif target_idx == 2: # v 학습
        sample_submission.iloc[:,4] = pred[:,3]

result4 = sample_submission.copy()

In [None]:
sample_submission = pd.read_csv('/kaggle/input/dacon-crash-prediction/sample_submission.csv')

sample_submission[['X', 'Y', 'M' 'V']] = result1[['X', 'Y', 'M' 'V']]*0.8 + result2[['X', 'Y', 'M' 'V']]*0.07 + result3[['X', 'Y', 'M' 'V']]*0.07 + result4[['X', 'Y', 'M' 'V']]*0.06 
sample_submission.to_csv('submission.csv', index = False)