In [1]:
import pandas as pd
import numpy as np
import json
import tensorflow.keras.layers as L
import tensorflow as tf
import plotly.express as px

## Define helper functions and useful vars

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [3]:
# This will tell us the columns we are predicting
pred_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

In [4]:
def gru_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.GRU(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def lstm_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.LSTM(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def build_model(gru=False,seq_len=107, pred_len=68, dropout=0.25,
                embed_dim=128, hidden_dim=384):
    
    inputs = tf.keras.layers.Input(shape=(seq_len, 3))
    
    inputs_bpps = tf.keras.layers.Input(shape=(seq_len, 1))
    

    embed0 = tf.keras.layers.Embedding(input_dim=len(token2int0), output_dim=embed_dim)(inputs[:, :, 0])
    embed1 = tf.keras.layers.Embedding(input_dim=len(token2int1), output_dim=embed_dim)(inputs[:, :, 1])
    embed2 = tf.keras.layers.Embedding(input_dim=len(token2int2), output_dim=embed_dim)(inputs[:, :, 2])
    
    
    embed0 = tf.keras.layers.SpatialDropout1D(.2)(embed0)
    embed1 = tf.keras.layers.SpatialDropout1D(.2)(embed1)
    embed2 = tf.keras.layers.SpatialDropout1D(.2)(embed2)
    
    embed = tf.concat([embed0, embed1, embed2], axis=2)
    
    #reshaped = tf.reshape(
    #    embed, shape=(-1, embed.shape[1],  embed.shape[2] * embed.shape[3]))
    
    embed = tf.keras.layers.SpatialDropout1D(.2)(embed)
    
    bpps = tf.keras.layers.Dense(embed_dim, activation='linear')(inputs_bpps)
    
    embed = tf.concat([embed, bpps], axis=2)
    
    transformer_block = TransformerBlock(512, 8, 512)
    embed = transformer_block(embed)
    
    hidden = gru_layer(hidden_dim, dropout)(embed)
    hidden = gru_layer(hidden_dim, dropout)(hidden)
    hidden = gru_layer(hidden_dim, dropout)(hidden)

    
    #only making predictions on the first part of each sequence
    truncated = hidden[:, :pred_len]
    
    out1 = tf.keras.layers.Dense(5, activation='linear', name='out1')(truncated)
    out2 = tf.keras.layers.Dense(5, activation='linear', name='out2')(truncated)

    model = tf.keras.Model(inputs=[inputs, inputs_bpps], outputs=[out1, out2])

    #some optimizers
    adam = tf.optimizers.Adam()
    def MCRMSE(y_true, y_pred):
        colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)
        return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)
    
    model.compile(optimizer = adam, loss={'out1': MCRMSE, 'out2': 'mae'}, loss_weights={'out1': 0.7, 'out2': 0.3})
    
    return model

In [5]:
token2int = {x:i for i, x in enumerate('().ACGUBEHIMSX')}

def preprocess_inputs(df, cols=['sequence', 'structure', 'predicted_loop_type']):
    return np.transpose(
        np.array(
            df[cols]
            .applymap(lambda seq: [token2int[x] for x in seq])
            .values
            .tolist()
        ),
        (0, 2, 1)
    )

## Load and preprocess data

In [6]:
train = pd.read_json('../input//train.json', lines=True)
test = pd.read_json('../input//test.json', lines=True)
sample_df = pd.read_csv('../input//sample_submission.csv')

In [7]:
#target columns
target_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

In [8]:
token2int0 = {'G': 0, 'A': 1, 'C': 2, 'U': 3}
token2int1 = {'.': 0,  '(': 1, ')': 2}
token2int2 = {'E': 0, 'S': 1, 'H': 2, 'B': 3, 'X': 4, 'I': 5, 'M': 6}

def convert_seq(x, tmp_dict):
    return [tmp_dict[ele] for ele in x]

train['sequence'] = train['sequence'].apply(lambda x: [token2int0[ele] for ele in x])
train['structure'] = train['structure'].apply(lambda x: [token2int1[ele] for ele in x])
train['predicted_loop_type'] = train['predicted_loop_type'].apply(lambda x: [token2int2[ele] for ele in x])
train_inputs = np.transpose(np.array(train[['sequence', 'structure', 'predicted_loop_type']].values.tolist()), (0, 2, 1))

train_inputs = train_inputs[train.signal_to_noise > 1]
train_labels = np.array(train[train.signal_to_noise > 1][target_cols].values.tolist()).transpose((0, 2, 1))

In [9]:
train_bpps = np.stack([1 - np.load(f'../input/bpps/{ele}.npy').sum(1) for ele in train['id']])
train_bpps = train_bpps[train.signal_to_noise > 1][:, :, np.newaxis]

In [10]:
from sklearn.model_selection import KFold

In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output
    
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
    def get_config(self):
        config = super().get_config().copy()
        return config
    
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [12]:
train_labels.shape

(2096, 68, 5)

In [13]:
np.zeros_like(train_labels).shape

(2096, 68, 5)

In [14]:
FOLDS = KFold(n_splits=5, random_state=815, shuffle=True)

oofs_pred = np.zeros_like(train_labels)
public_preds_array = []
public_preds_array = []

for i, (trn_idx, vld_idx) in enumerate(FOLDS.split(train_inputs)):
    trn_inputs = train_inputs[trn_idx]
    vld_inputs = train_inputs[vld_idx]
    
    trn_inputs_bpps = train_bpps[trn_idx]
    vld_inputs_bpps = train_bpps[vld_idx]

    trn_labels = train_labels[trn_idx]
    vld_labels = train_labels[vld_idx]

    model = build_model()
    model.summary()

    history = model.fit(
        [trn_inputs, trn_inputs_bpps], trn_labels, 
        validation_data=([vld_inputs, vld_inputs_bpps], vld_labels),
        batch_size=32,
        epochs=120,
        callbacks=[
            tf.keras.callbacks.ReduceLROnPlateau(),
            tf.keras.callbacks.ModelCheckpoint('tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_815.h5')
        ],
        verbose=2,
    )
    model.load_weights('./tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_815.h5')
    outputs, outputs2 = model.predict([vld_inputs, vld_inputs_bpps])
    oofs_pred[vld_idx] = outputs
    
    from sklearn.metrics import mean_squared_error
    errors = []
    for idx in range(5):
         errors.append(np.sqrt(mean_squared_error(vld_labels[:, idx], outputs[:, idx])))
    final_error = np.mean(errors)
    print('#'*20, final_error)

    public_df = test.query("seq_length == 107").copy()
    private_df = test.query("seq_length == 130").copy()
    
    public_df['sequence'] = public_df['sequence'].apply(lambda x: [token2int0[ele] for ele in x])
    public_df['structure'] = public_df['structure'].apply(lambda x: [token2int1[ele] for ele in x])
    public_df['predicted_loop_type'] = public_df['predicted_loop_type'].apply(lambda x: [token2int2[ele] for ele in x])
    public_inputs = np.transpose(np.array(public_df[['sequence', 'structure', 'predicted_loop_type']].values.tolist()), (0, 2, 1))

    private_df['sequence'] = private_df['sequence'].apply(lambda x: [token2int0[ele] for ele in x])
    private_df['structure'] = private_df['structure'].apply(lambda x: [token2int1[ele] for ele in x])
    private_df['predicted_loop_type'] = private_df['predicted_loop_type'].apply(lambda x: [token2int2[ele] for ele in x])
    private_inputs = np.transpose(np.array(private_df[['sequence', 'structure', 'predicted_loop_type']].values.tolist()), (0, 2, 1))

    public_bpps = np.stack([1 - np.load(f'../input/bpps/{ele}.npy').sum(1) for ele in public_df['id']])
    public_bpps = public_bpps[:, :, np.newaxis]
    
    private_bpps = np.stack([1 - np.load(f'../input/bpps/{ele}.npy').sum(1) for ele in private_df['id']])
    private_bpps = private_bpps[:, :, np.newaxis] 

    # Caveat: The prediction format requires the output to be the same length as the input,
    # although it's not the case for the training data.
    model_short = build_model(seq_len=107, pred_len=107)
    model_long = build_model(seq_len=130, pred_len=130)

    model_short.load_weights('tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_815.h5')
    model_long.load_weights('tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_815.h5')

    public_preds, outputs2 = model_short.predict([public_inputs, public_bpps])
    private_preds, outputs2 = model_long.predict([private_inputs,private_bpps])
    
    public_preds_array.append(public_preds)
    public_preds_array.append(private_preds)

    print(public_preds.shape, private_preds.shape)

    preds_ls = []

    for df, preds in [(public_df, public_preds), (private_df, private_preds)]:
        for idx, uid in enumerate(df.id):
            single_pred = preds[idx]

            single_df = pd.DataFrame(single_pred, columns=pred_cols)
            single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

            preds_ls.append(single_df)

    preds_df = pd.concat(preds_ls)

    submission = sample_df[['id_seqpos']].merge(preds_df, on=['id_seqpos'])
    submission.to_csv(f'submission_tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_815_{i}.csv', index=False)

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 107, 3)]     0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice (Tens [(None, 107)]        0           input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_1 (Te [(None, 107)]        0           input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_2 (Te [(None, 107)]        0           input_1[0][0]                    
_______________________________________________________________________________________

Epoch 22/120
53/53 - 4s - loss: 0.1940 - out1_loss: 0.2144 - out2_loss: 0.1465 - val_loss: 0.2039 - val_out1_loss: 0.2263 - val_out2_loss: 0.1518
Epoch 23/120
53/53 - 4s - loss: 0.1920 - out1_loss: 0.2125 - out2_loss: 0.1442 - val_loss: 0.2040 - val_out1_loss: 0.2268 - val_out2_loss: 0.1507
Epoch 24/120
53/53 - 4s - loss: 0.1874 - out1_loss: 0.2078 - out2_loss: 0.1400 - val_loss: 0.2072 - val_out1_loss: 0.2312 - val_out2_loss: 0.1511
Epoch 25/120
53/53 - 4s - loss: 0.1851 - out1_loss: 0.2052 - out2_loss: 0.1383 - val_loss: 0.2010 - val_out1_loss: 0.2225 - val_out2_loss: 0.1509
Epoch 26/120
53/53 - 4s - loss: 0.1827 - out1_loss: 0.2023 - out2_loss: 0.1369 - val_loss: 0.2020 - val_out1_loss: 0.2240 - val_out2_loss: 0.1505
Epoch 27/120
53/53 - 4s - loss: 0.1803 - out1_loss: 0.1994 - out2_loss: 0.1357 - val_loss: 0.2020 - val_out1_loss: 0.2247 - val_out2_loss: 0.1490
Epoch 28/120
53/53 - 4s - loss: 0.1781 - out1_loss: 0.1971 - out2_loss: 0.1339 - val_loss: 0.1997 - val_out1_loss: 0.2222 - 

Epoch 79/120
53/53 - 4s - loss: 0.1163 - out1_loss: 0.1262 - out2_loss: 0.0934 - val_loss: 0.1890 - val_out1_loss: 0.2101 - val_out2_loss: 0.1400
Epoch 80/120
53/53 - 4s - loss: 0.1160 - out1_loss: 0.1258 - out2_loss: 0.0931 - val_loss: 0.1891 - val_out1_loss: 0.2102 - val_out2_loss: 0.1400
Epoch 81/120
53/53 - 4s - loss: 0.1156 - out1_loss: 0.1254 - out2_loss: 0.0929 - val_loss: 0.1892 - val_out1_loss: 0.2103 - val_out2_loss: 0.1400
Epoch 82/120
53/53 - 4s - loss: 0.1153 - out1_loss: 0.1250 - out2_loss: 0.0927 - val_loss: 0.1891 - val_out1_loss: 0.2102 - val_out2_loss: 0.1400
Epoch 83/120
53/53 - 4s - loss: 0.1149 - out1_loss: 0.1246 - out2_loss: 0.0925 - val_loss: 0.1888 - val_out1_loss: 0.2098 - val_out2_loss: 0.1398
Epoch 84/120
53/53 - 4s - loss: 0.1147 - out1_loss: 0.1244 - out2_loss: 0.0922 - val_loss: 0.1889 - val_out1_loss: 0.2100 - val_out2_loss: 0.1398
Epoch 85/120
53/53 - 5s - loss: 0.1144 - out1_loss: 0.1240 - out2_loss: 0.0922 - val_loss: 0.1890 - val_out1_loss: 0.2101 - 

53/53 - 7s - loss: 0.6524 - out1_loss: 0.7291 - out2_loss: 0.4733 - val_loss: 0.3638 - val_out1_loss: 0.3971 - val_out2_loss: 0.2859
Epoch 2/120
53/53 - 5s - loss: 0.3529 - out1_loss: 0.3919 - out2_loss: 0.2618 - val_loss: 0.3395 - val_out1_loss: 0.3793 - val_out2_loss: 0.2466
Epoch 3/120
53/53 - 4s - loss: 0.3245 - out1_loss: 0.3602 - out2_loss: 0.2414 - val_loss: 0.3103 - val_out1_loss: 0.3514 - val_out2_loss: 0.2145
Epoch 4/120
53/53 - 4s - loss: 0.3055 - out1_loss: 0.3383 - out2_loss: 0.2291 - val_loss: 0.2975 - val_out1_loss: 0.3325 - val_out2_loss: 0.2159
Epoch 5/120
53/53 - 4s - loss: 0.2891 - out1_loss: 0.3218 - out2_loss: 0.2126 - val_loss: 0.2733 - val_out1_loss: 0.3033 - val_out2_loss: 0.2031
Epoch 6/120
53/53 - 4s - loss: 0.2807 - out1_loss: 0.3121 - out2_loss: 0.2076 - val_loss: 0.2686 - val_out1_loss: 0.3007 - val_out2_loss: 0.1936
Epoch 7/120
53/53 - 4s - loss: 0.2685 - out1_loss: 0.2992 - out2_loss: 0.1969 - val_loss: 0.2558 - val_out1_loss: 0.2873 - val_out2_loss: 0.18

Epoch 58/120
53/53 - 4s - loss: 0.1375 - out1_loss: 0.1498 - out2_loss: 0.1088 - val_loss: 0.1911 - val_out1_loss: 0.2123 - val_out2_loss: 0.1415
Epoch 59/120
53/53 - 4s - loss: 0.1362 - out1_loss: 0.1486 - out2_loss: 0.1071 - val_loss: 0.1912 - val_out1_loss: 0.2129 - val_out2_loss: 0.1404
Epoch 60/120
53/53 - 4s - loss: 0.1358 - out1_loss: 0.1479 - out2_loss: 0.1074 - val_loss: 0.1901 - val_out1_loss: 0.2117 - val_out2_loss: 0.1398
Epoch 61/120
53/53 - 5s - loss: 0.1356 - out1_loss: 0.1478 - out2_loss: 0.1073 - val_loss: 0.1907 - val_out1_loss: 0.2127 - val_out2_loss: 0.1394
Epoch 62/120
53/53 - 4s - loss: 0.1346 - out1_loss: 0.1461 - out2_loss: 0.1076 - val_loss: 0.1914 - val_out1_loss: 0.2130 - val_out2_loss: 0.1409
Epoch 63/120
53/53 - 4s - loss: 0.1284 - out1_loss: 0.1398 - out2_loss: 0.1018 - val_loss: 0.1866 - val_out1_loss: 0.2079 - val_out2_loss: 0.1368
Epoch 64/120
53/53 - 5s - loss: 0.1259 - out1_loss: 0.1370 - out2_loss: 0.1000 - val_loss: 0.1861 - val_out1_loss: 0.2074 - 

53/53 - 5s - loss: 0.1183 - out1_loss: 0.1283 - out2_loss: 0.0949 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out2_loss: 0.1359
Epoch 115/120
53/53 - 4s - loss: 0.1186 - out1_loss: 0.1287 - out2_loss: 0.0950 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out2_loss: 0.1359
Epoch 116/120
53/53 - 4s - loss: 0.1186 - out1_loss: 0.1287 - out2_loss: 0.0950 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out2_loss: 0.1359
Epoch 117/120
53/53 - 4s - loss: 0.1183 - out1_loss: 0.1283 - out2_loss: 0.0949 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out2_loss: 0.1359
Epoch 118/120
53/53 - 5s - loss: 0.1183 - out1_loss: 0.1284 - out2_loss: 0.0949 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out2_loss: 0.1359
Epoch 119/120
53/53 - 5s - loss: 0.1184 - out1_loss: 0.1284 - out2_loss: 0.0949 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out2_loss: 0.1359
Epoch 120/120
53/53 - 4s - loss: 0.1185 - out1_loss: 0.1286 - out2_loss: 0.0951 - val_loss: 0.1857 - val_out1_loss: 0.2071 - val_out

Epoch 15/120
53/53 - 4s - loss: 0.2237 - out1_loss: 0.2488 - out2_loss: 0.1650 - val_loss: 0.2234 - val_out1_loss: 0.2470 - val_out2_loss: 0.1683
Epoch 16/120
53/53 - 4s - loss: 0.2199 - out1_loss: 0.2438 - out2_loss: 0.1642 - val_loss: 0.2204 - val_out1_loss: 0.2444 - val_out2_loss: 0.1645
Epoch 17/120
53/53 - 4s - loss: 0.2136 - out1_loss: 0.2365 - out2_loss: 0.1602 - val_loss: 0.2130 - val_out1_loss: 0.2372 - val_out2_loss: 0.1565
Epoch 18/120
53/53 - 4s - loss: 0.2099 - out1_loss: 0.2325 - out2_loss: 0.1571 - val_loss: 0.2116 - val_out1_loss: 0.2348 - val_out2_loss: 0.1573
Epoch 19/120
53/53 - 5s - loss: 0.2054 - out1_loss: 0.2278 - out2_loss: 0.1530 - val_loss: 0.2100 - val_out1_loss: 0.2339 - val_out2_loss: 0.1542
Epoch 20/120
53/53 - 4s - loss: 0.2036 - out1_loss: 0.2254 - out2_loss: 0.1528 - val_loss: 0.2115 - val_out1_loss: 0.2345 - val_out2_loss: 0.1579
Epoch 21/120
53/53 - 4s - loss: 0.2009 - out1_loss: 0.2225 - out2_loss: 0.1504 - val_loss: 0.2103 - val_out1_loss: 0.2327 - 

Epoch 72/120
53/53 - 4s - loss: 0.1247 - out1_loss: 0.1356 - out2_loss: 0.0992 - val_loss: 0.1918 - val_out1_loss: 0.2141 - val_out2_loss: 0.1397
Epoch 73/120
53/53 - 5s - loss: 0.1222 - out1_loss: 0.1328 - out2_loss: 0.0976 - val_loss: 0.1913 - val_out1_loss: 0.2136 - val_out2_loss: 0.1394
Epoch 74/120
53/53 - 4s - loss: 0.1209 - out1_loss: 0.1313 - out2_loss: 0.0968 - val_loss: 0.1911 - val_out1_loss: 0.2134 - val_out2_loss: 0.1391
Epoch 75/120
53/53 - 4s - loss: 0.1203 - out1_loss: 0.1306 - out2_loss: 0.0964 - val_loss: 0.1910 - val_out1_loss: 0.2132 - val_out2_loss: 0.1391
Epoch 76/120
53/53 - 4s - loss: 0.1198 - out1_loss: 0.1300 - out2_loss: 0.0961 - val_loss: 0.1910 - val_out1_loss: 0.2132 - val_out2_loss: 0.1390
Epoch 77/120
53/53 - 4s - loss: 0.1192 - out1_loss: 0.1293 - out2_loss: 0.0956 - val_loss: 0.1908 - val_out1_loss: 0.2131 - val_out2_loss: 0.1390
Epoch 78/120
53/53 - 4s - loss: 0.1190 - out1_loss: 0.1290 - out2_loss: 0.0955 - val_loss: 0.1908 - val_out1_loss: 0.2130 - 

53/53 - 6s - loss: 0.6952 - out1_loss: 0.7546 - out2_loss: 0.5567 - val_loss: 0.3678 - val_out1_loss: 0.4070 - val_out2_loss: 0.2763
Epoch 2/120
53/53 - 5s - loss: 0.3558 - out1_loss: 0.3935 - out2_loss: 0.2676 - val_loss: 0.3393 - val_out1_loss: 0.3771 - val_out2_loss: 0.2509
Epoch 3/120
53/53 - 4s - loss: 0.3278 - out1_loss: 0.3642 - out2_loss: 0.2428 - val_loss: 0.3024 - val_out1_loss: 0.3372 - val_out2_loss: 0.2212
Epoch 4/120
53/53 - 5s - loss: 0.3009 - out1_loss: 0.3345 - out2_loss: 0.2224 - val_loss: 0.2904 - val_out1_loss: 0.3254 - val_out2_loss: 0.2085
Epoch 5/120
53/53 - 4s - loss: 0.2883 - out1_loss: 0.3211 - out2_loss: 0.2117 - val_loss: 0.2761 - val_out1_loss: 0.3096 - val_out2_loss: 0.1980
Epoch 6/120
53/53 - 4s - loss: 0.2762 - out1_loss: 0.3068 - out2_loss: 0.2049 - val_loss: 0.2753 - val_out1_loss: 0.3059 - val_out2_loss: 0.2039
Epoch 7/120
53/53 - 5s - loss: 0.2675 - out1_loss: 0.2971 - out2_loss: 0.1985 - val_loss: 0.2580 - val_out1_loss: 0.2868 - val_out2_loss: 0.19

Epoch 58/120
53/53 - 4s - loss: 0.1370 - out1_loss: 0.1492 - out2_loss: 0.1087 - val_loss: 0.1957 - val_out1_loss: 0.2190 - val_out2_loss: 0.1414
Epoch 59/120
53/53 - 4s - loss: 0.1367 - out1_loss: 0.1488 - out2_loss: 0.1083 - val_loss: 0.1956 - val_out1_loss: 0.2186 - val_out2_loss: 0.1419
Epoch 60/120
53/53 - 4s - loss: 0.1353 - out1_loss: 0.1474 - out2_loss: 0.1073 - val_loss: 0.1940 - val_out1_loss: 0.2170 - val_out2_loss: 0.1405
Epoch 61/120
53/53 - 4s - loss: 0.1368 - out1_loss: 0.1489 - out2_loss: 0.1086 - val_loss: 0.2011 - val_out1_loss: 0.2241 - val_out2_loss: 0.1474
Epoch 62/120
53/53 - 4s - loss: 0.1351 - out1_loss: 0.1470 - out2_loss: 0.1072 - val_loss: 0.1940 - val_out1_loss: 0.2169 - val_out2_loss: 0.1406
Epoch 63/120
53/53 - 4s - loss: 0.1321 - out1_loss: 0.1438 - out2_loss: 0.1049 - val_loss: 0.1970 - val_out1_loss: 0.2205 - val_out2_loss: 0.1420
Epoch 64/120
53/53 - 4s - loss: 0.1329 - out1_loss: 0.1446 - out2_loss: 0.1054 - val_loss: 0.1941 - val_out1_loss: 0.2171 - 

53/53 - 4s - loss: 0.1074 - out1_loss: 0.1159 - out2_loss: 0.0878 - val_loss: 0.1909 - val_out1_loss: 0.2136 - val_out2_loss: 0.1380
Epoch 115/120
53/53 - 5s - loss: 0.1074 - out1_loss: 0.1158 - out2_loss: 0.0877 - val_loss: 0.1909 - val_out1_loss: 0.2136 - val_out2_loss: 0.1380
Epoch 116/120
53/53 - 4s - loss: 0.1073 - out1_loss: 0.1158 - out2_loss: 0.0876 - val_loss: 0.1910 - val_out1_loss: 0.2137 - val_out2_loss: 0.1379
Epoch 117/120
53/53 - 4s - loss: 0.1073 - out1_loss: 0.1157 - out2_loss: 0.0876 - val_loss: 0.1909 - val_out1_loss: 0.2136 - val_out2_loss: 0.1380
Epoch 118/120
53/53 - 4s - loss: 0.1072 - out1_loss: 0.1156 - out2_loss: 0.0875 - val_loss: 0.1909 - val_out1_loss: 0.2136 - val_out2_loss: 0.1379
Epoch 119/120
53/53 - 5s - loss: 0.1071 - out1_loss: 0.1156 - out2_loss: 0.0875 - val_loss: 0.1909 - val_out1_loss: 0.2136 - val_out2_loss: 0.1379
Epoch 120/120
53/53 - 5s - loss: 0.1073 - out1_loss: 0.1157 - out2_loss: 0.0876 - val_loss: 0.1909 - val_out1_loss: 0.2136 - val_out

Epoch 15/120
53/53 - 5s - loss: 0.2220 - out1_loss: 0.2463 - out2_loss: 0.1653 - val_loss: 0.2233 - val_out1_loss: 0.2494 - val_out2_loss: 0.1622
Epoch 16/120
53/53 - 4s - loss: 0.2185 - out1_loss: 0.2422 - out2_loss: 0.1633 - val_loss: 0.2219 - val_out1_loss: 0.2454 - val_out2_loss: 0.1672
Epoch 17/120
53/53 - 4s - loss: 0.2104 - out1_loss: 0.2329 - out2_loss: 0.1577 - val_loss: 0.2143 - val_out1_loss: 0.2395 - val_out2_loss: 0.1554
Epoch 18/120
53/53 - 4s - loss: 0.2087 - out1_loss: 0.2313 - out2_loss: 0.1560 - val_loss: 0.2181 - val_out1_loss: 0.2452 - val_out2_loss: 0.1547
Epoch 19/120
53/53 - 4s - loss: 0.2059 - out1_loss: 0.2281 - out2_loss: 0.1543 - val_loss: 0.2137 - val_out1_loss: 0.2382 - val_out2_loss: 0.1564
Epoch 20/120
53/53 - 4s - loss: 0.2014 - out1_loss: 0.2233 - out2_loss: 0.1502 - val_loss: 0.2085 - val_out1_loss: 0.2323 - val_out2_loss: 0.1531
Epoch 21/120
53/53 - 4s - loss: 0.1982 - out1_loss: 0.2196 - out2_loss: 0.1484 - val_loss: 0.2092 - val_out1_loss: 0.2310 - 

Epoch 72/120
53/53 - 4s - loss: 0.1282 - out1_loss: 0.1397 - out2_loss: 0.1015 - val_loss: 0.1917 - val_out1_loss: 0.2145 - val_out2_loss: 0.1383
Epoch 73/120
53/53 - 4s - loss: 0.1277 - out1_loss: 0.1391 - out2_loss: 0.1013 - val_loss: 0.1917 - val_out1_loss: 0.2146 - val_out2_loss: 0.1382
Epoch 74/120
53/53 - 4s - loss: 0.1275 - out1_loss: 0.1388 - out2_loss: 0.1012 - val_loss: 0.1921 - val_out1_loss: 0.2150 - val_out2_loss: 0.1386
Epoch 75/120
53/53 - 4s - loss: 0.1275 - out1_loss: 0.1388 - out2_loss: 0.1011 - val_loss: 0.1919 - val_out1_loss: 0.2149 - val_out2_loss: 0.1384
Epoch 76/120
53/53 - 4s - loss: 0.1274 - out1_loss: 0.1387 - out2_loss: 0.1010 - val_loss: 0.1919 - val_out1_loss: 0.2148 - val_out2_loss: 0.1385
Epoch 77/120
53/53 - 4s - loss: 0.1270 - out1_loss: 0.1383 - out2_loss: 0.1006 - val_loss: 0.1918 - val_out1_loss: 0.2148 - val_out2_loss: 0.1383
Epoch 78/120
53/53 - 5s - loss: 0.1269 - out1_loss: 0.1381 - out2_loss: 0.1006 - val_loss: 0.1918 - val_out1_loss: 0.2146 - 

In [15]:
# for i, uid in enumerate(train.id):
#     single_pred = oofs_pred[i]

#     oof_df = pd.DataFrame(single_pred, columns=pred_cols)
#     oof_df['id_seqpos'] = [f'{uid}_{x}' for x in range(oof_df.shape[0])]

- Gru 가 들어가면 좋은게 long term corelation 이 있는 것 아닐까...꼬이고 하니까
- Positional encoding 넣으면 확 뛸거 같은디