In [1]:
import pandas as pd
import numpy as np
import json
import tensorflow.keras.layers as L
import tensorflow as tf
import plotly.express as px

## Define helper functions and useful vars

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
# This will tell us the columns we are predicting
pred_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

In [4]:
def gru_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.GRU(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def lstm_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.LSTM(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def build_model(gru=False,seq_len=107, pred_len=68, dropout=0.25,
                embed_dim=128, hidden_dim=384):
    
    inputs = tf.keras.layers.Input(shape=(seq_len, 7))
    
    inputs_bpps = tf.keras.layers.Input(shape=(seq_len, 1))
    

    embed0 = tf.keras.layers.Embedding(input_dim=len(token2int0), output_dim=embed_dim)(inputs[:, :, 0])
    embed1 = tf.keras.layers.Embedding(input_dim=len(token2int1), output_dim=embed_dim)(inputs[:, :, 1])
    embed2 = tf.keras.layers.Embedding(input_dim=len(token2int2), output_dim=embed_dim)(inputs[:, :, 2])
    embed3 = tf.keras.layers.Embedding(input_dim=len(token2int2), output_dim=embed_dim)(inputs[:, :, 3])
    embed4 = tf.keras.layers.Embedding(input_dim=len(token2int2), output_dim=embed_dim)(inputs[:, :, 4])
    embed5 = tf.keras.layers.Embedding(input_dim=len(token2int2), output_dim=embed_dim)(inputs[:, :, 5])
    embed6 = tf.keras.layers.Embedding(input_dim=len(token2int2), output_dim=embed_dim)(inputs[:, :, 6])
    
    
    embed0 = tf.keras.layers.SpatialDropout1D(.2)(embed0)
    embed1 = tf.keras.layers.SpatialDropout1D(.2)(embed1)
    embed2 = tf.keras.layers.SpatialDropout1D(.2)(embed2)
    embed3 = tf.keras.layers.SpatialDropout1D(.2)(embed3)
    embed4 = tf.keras.layers.SpatialDropout1D(.2)(embed4)
    embed5 = tf.keras.layers.SpatialDropout1D(.2)(embed5)
    embed6 = tf.keras.layers.SpatialDropout1D(.2)(embed6)
    
    embed = tf.concat([embed0, embed1, embed2, embed3, embed4, embed5, embed6], axis=2)
    
    #reshaped = tf.reshape(
    #    embed, shape=(-1, embed.shape[1],  embed.shape[2] * embed.shape[3]))
    
    embed = tf.keras.layers.SpatialDropout1D(.2)(embed)
    
    bpps = tf.keras.layers.Dense(embed_dim, activation='linear')(inputs_bpps)
    
    embed = tf.concat([embed, bpps], axis=2)
    print(embed.shape)
    
    transformer_block = TransformerBlock(1024, 8, 1024)
    embed = transformer_block(embed)
    
    hidden = gru_layer(hidden_dim, dropout)(embed)
    hidden = gru_layer(hidden_dim, dropout)(hidden)
    hidden = gru_layer(hidden_dim, dropout)(hidden)

    
    #only making predictions on the first part of each sequence
    truncated = hidden[:, :pred_len]
    
    out1 = tf.keras.layers.Dense(5, activation='linear', name='out1')(truncated)
    out2 = tf.keras.layers.Dense(5, activation='linear', name='out2')(truncated)

    model = tf.keras.Model(inputs=[inputs, inputs_bpps], outputs=[out1, out2])

    #some optimizers
    adam = tf.optimizers.Adam()
    def MCRMSE(y_true, y_pred):
        colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)
        return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)
    
    model.compile(optimizer = adam, loss={'out1': MCRMSE, 'out2': 'mae'}, loss_weights={'out1': 0.7, 'out2': 0.3})
    
    return model

In [5]:
token2int = {x:i for i, x in enumerate('().ACGUBEHIMSX')}

def preprocess_inputs(df, cols=['sequence', 'structure', 'predicted_loop_type']):
    return np.transpose(
        np.array(
            df[cols]
            .applymap(lambda seq: [token2int[x] for x in seq])
            .values
            .tolist()
        ),
        (0, 2, 1)
    )

## Load and preprocess data

In [6]:
train = pd.read_csv('../input/train_v1.csv')

In [7]:
train.head()

Unnamed: 0,index,id,sequence,structure,predicted_loop_type,signal_to_noise,SN_filter,seq_length,seq_scored,reactivity_error,...,deg_error_50C,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,structure_gamma0,structure_gamma1,structure_gamma2,structure_gamma3
0,0,id_001f94081,GGAAAAGCUCUAAUAACAGGAGACUAGGACUACGUAUUUCUAGGUA...,.....((((((.......)))).)).((.....((..((((((......,EEEEESSSSSSHHHHHHHSSSSBSSXSSIIIIISSIISSSSSSHHH...,6.894,1,107,68,"[0.1359, 0.20700000000000002, 0.1633, 0.1452, ...",...,"[0.2167, 0.34750000000000003, 0.188, 0.2124, 0...","[0.3297, 1.5693000000000001, 1.1227, 0.8686, 0...","[0.7556, 2.983, 0.2526, 1.3789, 0.637600000000...","[2.3375, 3.5060000000000002, 0.3008, 1.0108, 0...","[0.35810000000000003, 2.9683, 0.2589, 1.4552, ...","[0.6382, 3.4773, 0.9988, 1.3228, 0.78770000000...",.................................................,.......((((.......)))).((((((.((..)).))))))......,(.....(((((.......))))(((((((.((..)).)))))).)....,((....(((((..(...)))))(((((((.((..)).)))))).)....
1,1,id_0049f53ba,GGAAAAAGCGCGCGCGGUUAGCGCGCGCUUUUGCGCGCGCUGUACC...,.....(((((((((((((((((((((((....)))))))))).)))...,EEEEESSSSSSSSSSSSSSSSSSSSSSSHHHHSSSSSSSSSSBSSS...,0.193,0,107,68,"[2.8272, 2.8272, 2.8272, 4.7343, 2.5676, 2.567...",...,"[15.4857, 7.9596, 13.3957, 5.8777, 5.8777, 5.8...","[0.0, 0.0, 0.0, 2.2965, 0.0, 0.0, 0.0, 0.0, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.947, 4.4523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.8511, 4.0426, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[7.6692, 0.0, 10.9561, 0.0, 0.0, 0.0, 0.0, 0.0...",.....(((((((((((((((((((((((....)))))))))).)))...,.....(((((((((((((((((((((((....)))))))))).)))...,..(..(((((((((((((((((((((((....)))))))))).)))...,(((..(((((((((((((((((((((((....)))))))))).)))...
2,2,id_006f36f57,GGAAAGUGCUCAGAUAAGCUAAGCUCGAAUAGCAAUCGAAUAGAAU...,.....((((.((.....((((.(((.....)))..((((......)...,EEEEESSSSISSIIIIISSSSMSSSHHHHHSSSMMSSSSHHHHHHS...,8.8,1,107,68,"[0.0931, 0.13290000000000002, 0.11280000000000...",...,"[0.14980000000000002, 0.1761, 0.1517, 0.116700...","[0.44820000000000004, 1.4822, 1.1819, 0.743400...","[0.2504, 1.4021, 0.9804, 0.49670000000000003, ...","[2.243, 2.9361, 1.0553, 0.721, 0.6396000000000...","[0.5163, 1.6823000000000001, 1.0426, 0.7902, 0...","[0.9501000000000001, 1.7974999999999999, 1.499...",...................................((((......)...,.....(((((.((.....)).)))((((...((..((((......)...,(....(((((.((.....)).)((((((.(.((.(((((......)...,((...(((((.((.....)).)))((((.(.((.(((((.(...))...
3,3,id_0082d463b,GGAAAAGCGCGCGCGCGCGCGCGAAAAAGCGCGCGCGCGCGCGCGC...,......((((((((((((((((......))))))))))))))))((...,EEEEEESSSSSSSSSSSSSSSSHHHHHHSSSSSSSSSSSSSSSSSS...,0.104,0,107,68,"[3.5229, 6.0748, 3.0374, 3.0374, 3.0374, 3.037...",...,"[15.3995, 8.1124, 7.7824, 7.7824, 7.7824, 7.78...","[0.0, 2.2399, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0....","[0.0, -0.5083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...","[3.4248, 6.8128, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, -0.8365, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...","[7.6692, -1.3223, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",.................................................,......(((((((((((((((((......)))))))))))))))))...,((....(((((((((((((((((......)))))))))))))))))...,((....(((((((((((((((((......)))))))))))))))))...
4,4,id_0087940f4,GGAAAAUAUAUAAUAUAUUAUAUAAAUAUAUUAUAGAAGUAUAAUA...,.....(((((((.((((((((((((.(((((((((....)))))))...,EEEEESSSSSSSBSSSSSSSSSSSSBSSSSSSSSSHHHHSSSSSSS...,0.423,0,107,68,"[1.665, 2.1728, 2.0041, 1.2405, 0.620200000000...",...,"[1.3285, 3.6173, 1.3057, 1.3021, 1.1507, 1.150...","[0.8267, 2.6577, 2.8481, 0.40090000000000003, ...","[2.1058, 3.138, 2.5437000000000003, 1.0932, 0....","[4.7366, 4.6243, 1.2068, 1.1538, 0.0, 0.0, 0.7...","[2.2052, 1.7947000000000002, 0.7457, 3.1233, 0...","[0.0, 5.1198, -0.3551, -0.3518, 0.0, 0.0, 0.0,...",......(((((..(((((((((((.((((((((((....)))))))...,.....((((((.((((((((((((.((((((((((....)))))))...,.....((((((.((((((((((((.((((((((((....)))))))...,((...((((((.((((((((((((.((((((((((....)))))))...


In [8]:
train_target = pd.read_json('../input//train.json', lines=True)
#test = pd.read_json('../input//test.json', lines=True)
train = pd.read_csv('../input/train_v1.csv')
test = pd.read_csv('../input/test_v1.csv')
sample_df = pd.read_csv('../input//sample_submission.csv')

In [9]:
train.head()

Unnamed: 0,index,id,sequence,structure,predicted_loop_type,signal_to_noise,SN_filter,seq_length,seq_scored,reactivity_error,...,deg_error_50C,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,structure_gamma0,structure_gamma1,structure_gamma2,structure_gamma3
0,0,id_001f94081,GGAAAAGCUCUAAUAACAGGAGACUAGGACUACGUAUUUCUAGGUA...,.....((((((.......)))).)).((.....((..((((((......,EEEEESSSSSSHHHHHHHSSSSBSSXSSIIIIISSIISSSSSSHHH...,6.894,1,107,68,"[0.1359, 0.20700000000000002, 0.1633, 0.1452, ...",...,"[0.2167, 0.34750000000000003, 0.188, 0.2124, 0...","[0.3297, 1.5693000000000001, 1.1227, 0.8686, 0...","[0.7556, 2.983, 0.2526, 1.3789, 0.637600000000...","[2.3375, 3.5060000000000002, 0.3008, 1.0108, 0...","[0.35810000000000003, 2.9683, 0.2589, 1.4552, ...","[0.6382, 3.4773, 0.9988, 1.3228, 0.78770000000...",.................................................,.......((((.......)))).((((((.((..)).))))))......,(.....(((((.......))))(((((((.((..)).)))))).)....,((....(((((..(...)))))(((((((.((..)).)))))).)....
1,1,id_0049f53ba,GGAAAAAGCGCGCGCGGUUAGCGCGCGCUUUUGCGCGCGCUGUACC...,.....(((((((((((((((((((((((....)))))))))).)))...,EEEEESSSSSSSSSSSSSSSSSSSSSSSHHHHSSSSSSSSSSBSSS...,0.193,0,107,68,"[2.8272, 2.8272, 2.8272, 4.7343, 2.5676, 2.567...",...,"[15.4857, 7.9596, 13.3957, 5.8777, 5.8777, 5.8...","[0.0, 0.0, 0.0, 2.2965, 0.0, 0.0, 0.0, 0.0, 0....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.947, 4.4523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.8511, 4.0426, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[7.6692, 0.0, 10.9561, 0.0, 0.0, 0.0, 0.0, 0.0...",.....(((((((((((((((((((((((....)))))))))).)))...,.....(((((((((((((((((((((((....)))))))))).)))...,..(..(((((((((((((((((((((((....)))))))))).)))...,(((..(((((((((((((((((((((((....)))))))))).)))...
2,2,id_006f36f57,GGAAAGUGCUCAGAUAAGCUAAGCUCGAAUAGCAAUCGAAUAGAAU...,.....((((.((.....((((.(((.....)))..((((......)...,EEEEESSSSISSIIIIISSSSMSSSHHHHHSSSMMSSSSHHHHHHS...,8.8,1,107,68,"[0.0931, 0.13290000000000002, 0.11280000000000...",...,"[0.14980000000000002, 0.1761, 0.1517, 0.116700...","[0.44820000000000004, 1.4822, 1.1819, 0.743400...","[0.2504, 1.4021, 0.9804, 0.49670000000000003, ...","[2.243, 2.9361, 1.0553, 0.721, 0.6396000000000...","[0.5163, 1.6823000000000001, 1.0426, 0.7902, 0...","[0.9501000000000001, 1.7974999999999999, 1.499...",...................................((((......)...,.....(((((.((.....)).)))((((...((..((((......)...,(....(((((.((.....)).)((((((.(.((.(((((......)...,((...(((((.((.....)).)))((((.(.((.(((((.(...))...
3,3,id_0082d463b,GGAAAAGCGCGCGCGCGCGCGCGAAAAAGCGCGCGCGCGCGCGCGC...,......((((((((((((((((......))))))))))))))))((...,EEEEEESSSSSSSSSSSSSSSSHHHHHHSSSSSSSSSSSSSSSSSS...,0.104,0,107,68,"[3.5229, 6.0748, 3.0374, 3.0374, 3.0374, 3.037...",...,"[15.3995, 8.1124, 7.7824, 7.7824, 7.7824, 7.78...","[0.0, 2.2399, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0....","[0.0, -0.5083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...","[3.4248, 6.8128, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, -0.8365, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...","[7.6692, -1.3223, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",.................................................,......(((((((((((((((((......)))))))))))))))))...,((....(((((((((((((((((......)))))))))))))))))...,((....(((((((((((((((((......)))))))))))))))))...
4,4,id_0087940f4,GGAAAAUAUAUAAUAUAUUAUAUAAAUAUAUUAUAGAAGUAUAAUA...,.....(((((((.((((((((((((.(((((((((....)))))))...,EEEEESSSSSSSBSSSSSSSSSSSSBSSSSSSSSSHHHHSSSSSSS...,0.423,0,107,68,"[1.665, 2.1728, 2.0041, 1.2405, 0.620200000000...",...,"[1.3285, 3.6173, 1.3057, 1.3021, 1.1507, 1.150...","[0.8267, 2.6577, 2.8481, 0.40090000000000003, ...","[2.1058, 3.138, 2.5437000000000003, 1.0932, 0....","[4.7366, 4.6243, 1.2068, 1.1538, 0.0, 0.0, 0.7...","[2.2052, 1.7947000000000002, 0.7457, 3.1233, 0...","[0.0, 5.1198, -0.3551, -0.3518, 0.0, 0.0, 0.0,...",......(((((..(((((((((((.((((((((((....)))))))...,.....((((((.((((((((((((.((((((((((....)))))))...,.....((((((.((((((((((((.((((((((((....)))))))...,((...((((((.((((((((((((.((((((((((....)))))))...


In [10]:
#target columns
target_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

In [11]:
token2int0 = {'G': 0, 'A': 1, 'C': 2, 'U': 3}
token2int1 = {'.': 0,  '(': 1, ')': 2}
token2int2 = {'E': 0, 'S': 1, 'H': 2, 'B': 3, 'X': 4, 'I': 5, 'M': 6}

def convert_seq(x, tmp_dict):
    return [tmp_dict[ele] for ele in x]

train['sequence'] = train['sequence'].apply(lambda x: [token2int0[ele] for ele in x])
train['structure'] = train['structure'].apply(lambda x: [token2int1[ele] for ele in x])
train['structure_gamma0'] = train['structure_gamma0'].apply(lambda x: [token2int1[ele] for ele in x])
train['structure_gamma1'] = train['structure_gamma1'].apply(lambda x: [token2int1[ele] for ele in x])
train['structure_gamma2'] = train['structure_gamma2'].apply(lambda x: [token2int1[ele] for ele in x])
train['structure_gamma3'] = train['structure_gamma3'].apply(lambda x: [token2int1[ele] for ele in x])
train['predicted_loop_type'] = train['predicted_loop_type'].apply(lambda x: [token2int2[ele] for ele in x])

train_inputs = np.transpose(np.array(train[['sequence', 'structure', 'structure_gamma0', 'structure_gamma1', 
                                            'structure_gamma2', 'structure_gamma3','predicted_loop_type']].values.tolist()), (0, 2, 1))

In [12]:
train_inputs = train_inputs[train.signal_to_noise > 1]

In [13]:
train_inputs.shape

(2096, 107, 7)

In [14]:
train_labels = np.array(train_target[train_target.signal_to_noise > 1][target_cols].values.tolist()).transpose((0, 2, 1))

In [15]:
train_labels.shape

(2096, 68, 5)

In [16]:
train_bpps = np.stack([1 - np.load(f'../input/bpps/{ele}.npy').sum(1) for ele in train['id']])
train_bpps = train_bpps[train.signal_to_noise > 1][:, :, np.newaxis]

In [17]:
from sklearn.model_selection import KFold

In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output
    
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
    def get_config(self):
        config = super().get_config().copy()
        return config
    
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [19]:
train_labels.shape

(2096, 68, 5)

In [20]:
np.zeros_like(train_labels).shape

(2096, 68, 5)

In [21]:
FOLDS = KFold(n_splits=5, random_state=815, shuffle=True)

oofs_pred = np.zeros_like(train_labels)
public_preds_array = []
public_preds_array = []

for i, (trn_idx, vld_idx) in enumerate(FOLDS.split(train_inputs)):
    trn_inputs = train_inputs[trn_idx]
    vld_inputs = train_inputs[vld_idx]
    
    trn_inputs_bpps = train_bpps[trn_idx]
    vld_inputs_bpps = train_bpps[vld_idx]

    trn_labels = train_labels[trn_idx]
    vld_labels = train_labels[vld_idx]

    model = build_model()
    model.summary()

    history = model.fit(
        [trn_inputs, trn_inputs_bpps], trn_labels, 
        validation_data=([vld_inputs, vld_inputs_bpps], vld_labels),
        batch_size=32,
        epochs=120,
        callbacks=[
            tf.keras.callbacks.ReduceLROnPlateau(),
            tf.keras.callbacks.ModelCheckpoint('tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_newStruc_815.h5')
        ],
        verbose=2,
    )
    model.load_weights('./tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_newStruc_815.h5')
    outputs, outputs2 = model.predict([vld_inputs, vld_inputs_bpps])
    oofs_pred[vld_idx] = outputs
    
    from sklearn.metrics import mean_squared_error
    errors = []
    for idx in range(5):
         errors.append(np.sqrt(mean_squared_error(vld_labels[:, idx], outputs[:, idx])))
    final_error = np.mean(errors)
    print('#'*20, final_error)

    public_df = test.query("seq_length == 107").copy()
    private_df = test.query("seq_length == 130").copy()
    
    public_df['sequence'] = public_df['sequence'].apply(lambda x: [token2int0[ele] for ele in x])
    public_df['structure'] = public_df['structure'].apply(lambda x: [token2int1[ele] for ele in x])
    public_df['structure_gamma0'] = public_df['structure_gamma0'].apply(lambda x: [token2int1[ele] for ele in x])
    public_df['structure_gamma1'] = public_df['structure_gamma1'].apply(lambda x: [token2int1[ele] for ele in x])
    public_df['structure_gamma2'] = public_df['structure_gamma2'].apply(lambda x: [token2int1[ele] for ele in x])
    public_df['structure_gamma3'] = public_df['structure_gamma3'].apply(lambda x: [token2int1[ele] for ele in x])
    public_df['predicted_loop_type'] = public_df['predicted_loop_type'].apply(lambda x: [token2int2[ele] for ele in x])
    public_inputs = np.transpose(np.array(public_df[['sequence', 'structure', 'structure_gamma0', 'structure_gamma1', 
                                            'structure_gamma2', 'structure_gamma3','predicted_loop_type']].values.tolist()), (0, 2, 1))
    

    private_df['sequence'] = private_df['sequence'].apply(lambda x: [token2int0[ele] for ele in x])
    private_df['structure'] = private_df['structure'].apply(lambda x: [token2int1[ele] for ele in x])
    private_df['structure_gamma0'] = private_df['structure_gamma0'].apply(lambda x: [token2int1[ele] for ele in x])
    private_df['structure_gamma1'] = private_df['structure_gamma1'].apply(lambda x: [token2int1[ele] for ele in x])
    private_df['structure_gamma2'] = private_df['structure_gamma2'].apply(lambda x: [token2int1[ele] for ele in x])
    private_df['structure_gamma3'] = private_df['structure_gamma3'].apply(lambda x: [token2int1[ele] for ele in x])
    private_df['predicted_loop_type'] = private_df['predicted_loop_type'].apply(lambda x: [token2int2[ele] for ele in x])
    private_inputs = np.transpose(np.array(private_df[['sequence', 'structure', 'structure_gamma0', 'structure_gamma1', 
                                            'structure_gamma2', 'structure_gamma3','predicted_loop_type']].values.tolist()), (0, 2, 1))

    public_bpps = np.stack([1 - np.load(f'../input/bpps/{ele}.npy').sum(1) for ele in public_df['id']])
    public_bpps = public_bpps[:, :, np.newaxis]
    
    private_bpps = np.stack([1 - np.load(f'../input/bpps/{ele}.npy').sum(1) for ele in private_df['id']])
    private_bpps = private_bpps[:, :, np.newaxis] 

    # Caveat: The prediction format requires the output to be the same length as the input,
    # although it's not the case for the training data.
    model_short = build_model(seq_len=107, pred_len=107)
    model_long = build_model(seq_len=130, pred_len=130)

    model_short.load_weights('tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_newStruc_815.h5')
    model_long.load_weights('tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_newStruc_815.h5')

    public_preds, outputs2 = model_short.predict([public_inputs, public_bpps])
    private_preds, outputs2 = model_long.predict([private_inputs,private_bpps])
    
    public_preds_array.append(public_preds)
    public_preds_array.append(private_preds)

    print(public_preds.shape, private_preds.shape)

    preds_ls = []

    for df, preds in [(public_df, public_preds), (private_df, private_preds)]:
        for idx, uid in enumerate(df.id):
            single_pred = preds[idx]

            single_df = pd.DataFrame(single_pred, columns=pred_cols)
            single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

            preds_ls.append(single_df)

    preds_df = pd.concat(preds_ls)

    submission = sample_df[['id_seqpos']].merge(preds_df, on=['id_seqpos'])
    submission.to_csv(f'submission_tf_simple_lstm_large_noise_more_epochs_bpps_large_new_loss_transformer_threeEmbedding_gru_twoloss_newStruc_815_{i}.csv', index=False)

(None, 107, 1024)
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 107, 7)]     0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice (Tens [(None, 107)]        0           input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_1 (Te [(None, 107)]        0           input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_2 (Te [(None, 107)]        0           input_1[0][0]                    
_____________________________________________________________________

Epoch 3/120
53/53 - 5s - loss: 0.3804 - out1_loss: 0.4201 - out2_loss: 0.2876 - val_loss: 0.3576 - val_out1_loss: 0.3942 - val_out2_loss: 0.2722
Epoch 4/120
53/53 - 5s - loss: 0.3496 - out1_loss: 0.3879 - out2_loss: 0.2601 - val_loss: 0.3403 - val_out1_loss: 0.3787 - val_out2_loss: 0.2508
Epoch 5/120
53/53 - 5s - loss: 0.3325 - out1_loss: 0.3692 - out2_loss: 0.2467 - val_loss: 0.3192 - val_out1_loss: 0.3520 - val_out2_loss: 0.2427
Epoch 6/120
53/53 - 5s - loss: 0.3161 - out1_loss: 0.3518 - out2_loss: 0.2326 - val_loss: 0.3043 - val_out1_loss: 0.3393 - val_out2_loss: 0.2225
Epoch 7/120
53/53 - 5s - loss: 0.3048 - out1_loss: 0.3405 - out2_loss: 0.2214 - val_loss: 0.2887 - val_out1_loss: 0.3222 - val_out2_loss: 0.2105
Epoch 8/120
53/53 - 5s - loss: 0.2918 - out1_loss: 0.3257 - out2_loss: 0.2127 - val_loss: 0.2819 - val_out1_loss: 0.3148 - val_out2_loss: 0.2053
Epoch 9/120
53/53 - 5s - loss: 0.2846 - out1_loss: 0.3178 - out2_loss: 0.2072 - val_loss: 0.2698 - val_out1_loss: 0.3006 - val_out

Epoch 60/120
53/53 - 6s - loss: 0.1468 - out1_loss: 0.1606 - out2_loss: 0.1145 - val_loss: 0.1962 - val_out1_loss: 0.2176 - val_out2_loss: 0.1462
Epoch 61/120
53/53 - 6s - loss: 0.1457 - out1_loss: 0.1595 - out2_loss: 0.1138 - val_loss: 0.1954 - val_out1_loss: 0.2167 - val_out2_loss: 0.1459
Epoch 62/120
53/53 - 6s - loss: 0.1483 - out1_loss: 0.1619 - out2_loss: 0.1165 - val_loss: 0.1966 - val_out1_loss: 0.2184 - val_out2_loss: 0.1457
Epoch 63/120
53/53 - 6s - loss: 0.1448 - out1_loss: 0.1584 - out2_loss: 0.1131 - val_loss: 0.1953 - val_out1_loss: 0.2169 - val_out2_loss: 0.1449
Epoch 64/120
53/53 - 6s - loss: 0.1436 - out1_loss: 0.1570 - out2_loss: 0.1123 - val_loss: 0.1970 - val_out1_loss: 0.2180 - val_out2_loss: 0.1480
Epoch 65/120
53/53 - 6s - loss: 0.1437 - out1_loss: 0.1570 - out2_loss: 0.1128 - val_loss: 0.1948 - val_out1_loss: 0.2165 - val_out2_loss: 0.1442
Epoch 66/120
53/53 - 6s - loss: 0.1447 - out1_loss: 0.1573 - out2_loss: 0.1153 - val_loss: 0.1996 - val_out1_loss: 0.2189 - 

53/53 - 6s - loss: 0.1164 - out1_loss: 0.1260 - out2_loss: 0.0939 - val_loss: 0.1911 - val_out1_loss: 0.2124 - val_out2_loss: 0.1414
Epoch 117/120
53/53 - 6s - loss: 0.1161 - out1_loss: 0.1257 - out2_loss: 0.0937 - val_loss: 0.1911 - val_out1_loss: 0.2124 - val_out2_loss: 0.1414
Epoch 118/120
53/53 - 6s - loss: 0.1160 - out1_loss: 0.1256 - out2_loss: 0.0937 - val_loss: 0.1911 - val_out1_loss: 0.2124 - val_out2_loss: 0.1414
Epoch 119/120
53/53 - 6s - loss: 0.1162 - out1_loss: 0.1258 - out2_loss: 0.0938 - val_loss: 0.1910 - val_out1_loss: 0.2123 - val_out2_loss: 0.1414
Epoch 120/120
53/53 - 7s - loss: 0.1160 - out1_loss: 0.1256 - out2_loss: 0.0937 - val_loss: 0.1910 - val_out1_loss: 0.2123 - val_out2_loss: 0.1413
#################### 0.3684601293738104
(None, 107, 1024)
(None, 130, 1024)
(629, 107, 5) (3005, 130, 5)
(None, 107, 1024)
Model: "functional_7"
__________________________________________________________________________________________________
Layer (type)                    Out

53/53 - 8s - loss: 0.7298 - out1_loss: 0.7889 - out2_loss: 0.5919 - val_loss: 0.3870 - val_out1_loss: 0.4265 - val_out2_loss: 0.2949
Epoch 2/120
53/53 - 6s - loss: 0.3889 - out1_loss: 0.4291 - out2_loss: 0.2950 - val_loss: 0.3881 - val_out1_loss: 0.4296 - val_out2_loss: 0.2911
Epoch 3/120
53/53 - 6s - loss: 0.3844 - out1_loss: 0.4230 - out2_loss: 0.2942 - val_loss: 0.3768 - val_out1_loss: 0.4134 - val_out2_loss: 0.2912
Epoch 4/120
53/53 - 6s - loss: 0.3790 - out1_loss: 0.4171 - out2_loss: 0.2900 - val_loss: 0.3747 - val_out1_loss: 0.4133 - val_out2_loss: 0.2846
Epoch 5/120
53/53 - 6s - loss: 0.3771 - out1_loss: 0.4159 - out2_loss: 0.2865 - val_loss: 0.3810 - val_out1_loss: 0.4212 - val_out2_loss: 0.2872
Epoch 6/120
53/53 - 6s - loss: 0.3777 - out1_loss: 0.4168 - out2_loss: 0.2866 - val_loss: 0.3766 - val_out1_loss: 0.4152 - val_out2_loss: 0.2866
Epoch 7/120
53/53 - 6s - loss: 0.3682 - out1_loss: 0.4072 - out2_loss: 0.2773 - val_loss: 0.3655 - val_out1_loss: 0.4062 - val_out2_loss: 0.27

Epoch 58/120
53/53 - 5s - loss: 0.1522 - out1_loss: 0.1668 - out2_loss: 0.1183 - val_loss: 0.1934 - val_out1_loss: 0.2154 - val_out2_loss: 0.1422
Epoch 59/120
53/53 - 6s - loss: 0.1516 - out1_loss: 0.1658 - out2_loss: 0.1185 - val_loss: 0.1930 - val_out1_loss: 0.2146 - val_out2_loss: 0.1425
Epoch 60/120
53/53 - 6s - loss: 0.1505 - out1_loss: 0.1645 - out2_loss: 0.1178 - val_loss: 0.1923 - val_out1_loss: 0.2139 - val_out2_loss: 0.1419
Epoch 61/120
53/53 - 6s - loss: 0.1500 - out1_loss: 0.1640 - out2_loss: 0.1173 - val_loss: 0.1947 - val_out1_loss: 0.2164 - val_out2_loss: 0.1441
Epoch 62/120
53/53 - 6s - loss: 0.1488 - out1_loss: 0.1623 - out2_loss: 0.1173 - val_loss: 0.1942 - val_out1_loss: 0.2164 - val_out2_loss: 0.1422
Epoch 63/120
53/53 - 6s - loss: 0.1479 - out1_loss: 0.1616 - out2_loss: 0.1159 - val_loss: 0.1935 - val_out1_loss: 0.2148 - val_out2_loss: 0.1436
Epoch 64/120
53/53 - 6s - loss: 0.1478 - out1_loss: 0.1611 - out2_loss: 0.1167 - val_loss: 0.1935 - val_out1_loss: 0.2150 - 

53/53 - 6s - loss: 0.1211 - out1_loss: 0.1312 - out2_loss: 0.0976 - val_loss: 0.1875 - val_out1_loss: 0.2090 - val_out2_loss: 0.1373
Epoch 115/120
53/53 - 6s - loss: 0.1213 - out1_loss: 0.1314 - out2_loss: 0.0976 - val_loss: 0.1875 - val_out1_loss: 0.2090 - val_out2_loss: 0.1373
Epoch 116/120
53/53 - 6s - loss: 0.1211 - out1_loss: 0.1312 - out2_loss: 0.0976 - val_loss: 0.1875 - val_out1_loss: 0.2090 - val_out2_loss: 0.1373
Epoch 117/120
53/53 - 6s - loss: 0.1209 - out1_loss: 0.1310 - out2_loss: 0.0975 - val_loss: 0.1875 - val_out1_loss: 0.2090 - val_out2_loss: 0.1373
Epoch 118/120
53/53 - 6s - loss: 0.1210 - out1_loss: 0.1311 - out2_loss: 0.0975 - val_loss: 0.1874 - val_out1_loss: 0.2089 - val_out2_loss: 0.1373
Epoch 119/120
53/53 - 6s - loss: 0.1210 - out1_loss: 0.1311 - out2_loss: 0.0975 - val_loss: 0.1874 - val_out1_loss: 0.2089 - val_out2_loss: 0.1373
Epoch 120/120
53/53 - 6s - loss: 0.1211 - out1_loss: 0.1312 - out2_loss: 0.0975 - val_loss: 0.1874 - val_out1_loss: 0.2089 - val_out

53/53 - 7s - loss: 0.7350 - out1_loss: 0.8129 - out2_loss: 0.5533 - val_loss: 0.3949 - val_out1_loss: 0.4351 - val_out2_loss: 0.3009
Epoch 2/120
53/53 - 6s - loss: 0.3855 - out1_loss: 0.4243 - out2_loss: 0.2951 - val_loss: 0.3878 - val_out1_loss: 0.4263 - val_out2_loss: 0.2979
Epoch 3/120
53/53 - 6s - loss: 0.3815 - out1_loss: 0.4194 - out2_loss: 0.2931 - val_loss: 0.3858 - val_out1_loss: 0.4252 - val_out2_loss: 0.2938
Epoch 4/120
53/53 - 6s - loss: 0.3778 - out1_loss: 0.4161 - out2_loss: 0.2885 - val_loss: 0.3806 - val_out1_loss: 0.4181 - val_out2_loss: 0.2933
Epoch 5/120
53/53 - 6s - loss: 0.3769 - out1_loss: 0.4152 - out2_loss: 0.2877 - val_loss: 0.3842 - val_out1_loss: 0.4232 - val_out2_loss: 0.2932
Epoch 6/120
53/53 - 6s - loss: 0.3764 - out1_loss: 0.4149 - out2_loss: 0.2867 - val_loss: 0.3792 - val_out1_loss: 0.4179 - val_out2_loss: 0.2890
Epoch 7/120
53/53 - 6s - loss: 0.3745 - out1_loss: 0.4133 - out2_loss: 0.2837 - val_loss: 0.3836 - val_out1_loss: 0.4226 - val_out2_loss: 0.29

Epoch 58/120
53/53 - 5s - loss: 0.1584 - out1_loss: 0.1738 - out2_loss: 0.1225 - val_loss: 0.1986 - val_out1_loss: 0.2209 - val_out2_loss: 0.1467
Epoch 59/120
53/53 - 6s - loss: 0.1549 - out1_loss: 0.1697 - out2_loss: 0.1206 - val_loss: 0.1985 - val_out1_loss: 0.2208 - val_out2_loss: 0.1463
Epoch 60/120
53/53 - 6s - loss: 0.1533 - out1_loss: 0.1674 - out2_loss: 0.1204 - val_loss: 0.2004 - val_out1_loss: 0.2221 - val_out2_loss: 0.1498
Epoch 61/120
53/53 - 5s - loss: 0.1542 - out1_loss: 0.1681 - out2_loss: 0.1219 - val_loss: 0.1971 - val_out1_loss: 0.2192 - val_out2_loss: 0.1457
Epoch 62/120
53/53 - 5s - loss: 0.1535 - out1_loss: 0.1681 - out2_loss: 0.1194 - val_loss: 0.1989 - val_out1_loss: 0.2206 - val_out2_loss: 0.1484
Epoch 63/120
53/53 - 6s - loss: 0.1536 - out1_loss: 0.1674 - out2_loss: 0.1214 - val_loss: 0.1994 - val_out1_loss: 0.2212 - val_out2_loss: 0.1485
Epoch 64/120
53/53 - 6s - loss: 0.1501 - out1_loss: 0.1641 - out2_loss: 0.1173 - val_loss: 0.1985 - val_out1_loss: 0.2213 - 

53/53 - 6s - loss: 0.1226 - out1_loss: 0.1328 - out2_loss: 0.0986 - val_loss: 0.1940 - val_out1_loss: 0.2164 - val_out2_loss: 0.1416
Epoch 115/120
53/53 - 6s - loss: 0.1226 - out1_loss: 0.1328 - out2_loss: 0.0986 - val_loss: 0.1940 - val_out1_loss: 0.2164 - val_out2_loss: 0.1416
Epoch 116/120
53/53 - 6s - loss: 0.1225 - out1_loss: 0.1328 - out2_loss: 0.0986 - val_loss: 0.1939 - val_out1_loss: 0.2164 - val_out2_loss: 0.1416
Epoch 117/120
53/53 - 6s - loss: 0.1225 - out1_loss: 0.1328 - out2_loss: 0.0986 - val_loss: 0.1940 - val_out1_loss: 0.2164 - val_out2_loss: 0.1417
Epoch 118/120
53/53 - 6s - loss: 0.1223 - out1_loss: 0.1325 - out2_loss: 0.0984 - val_loss: 0.1939 - val_out1_loss: 0.2164 - val_out2_loss: 0.1417
Epoch 119/120
53/53 - 6s - loss: 0.1225 - out1_loss: 0.1327 - out2_loss: 0.0986 - val_loss: 0.1939 - val_out1_loss: 0.2163 - val_out2_loss: 0.1416
Epoch 120/120
53/53 - 6s - loss: 0.1222 - out1_loss: 0.1324 - out2_loss: 0.0985 - val_loss: 0.1939 - val_out1_loss: 0.2163 - val_out

53/53 - 8s - loss: 0.7339 - out1_loss: 0.8156 - out2_loss: 0.5433 - val_loss: 0.3939 - val_out1_loss: 0.4343 - val_out2_loss: 0.2997
Epoch 2/120
53/53 - 6s - loss: 0.3885 - out1_loss: 0.4284 - out2_loss: 0.2952 - val_loss: 0.3872 - val_out1_loss: 0.4297 - val_out2_loss: 0.2880
Epoch 3/120
53/53 - 6s - loss: 0.3814 - out1_loss: 0.4205 - out2_loss: 0.2902 - val_loss: 0.3805 - val_out1_loss: 0.4205 - val_out2_loss: 0.2874
Epoch 4/120
53/53 - 6s - loss: 0.3785 - out1_loss: 0.4168 - out2_loss: 0.2892 - val_loss: 0.3779 - val_out1_loss: 0.4173 - val_out2_loss: 0.2859
Epoch 5/120
53/53 - 6s - loss: 0.3789 - out1_loss: 0.4171 - out2_loss: 0.2897 - val_loss: 0.3758 - val_out1_loss: 0.4148 - val_out2_loss: 0.2848
Epoch 6/120
53/53 - 7s - loss: 0.3765 - out1_loss: 0.4143 - out2_loss: 0.2883 - val_loss: 0.3752 - val_out1_loss: 0.4146 - val_out2_loss: 0.2835
Epoch 7/120
53/53 - 6s - loss: 0.3769 - out1_loss: 0.4155 - out2_loss: 0.2869 - val_loss: 0.3893 - val_out1_loss: 0.4271 - val_out2_loss: 0.30

Epoch 58/120
53/53 - 6s - loss: 0.1598 - out1_loss: 0.1750 - out2_loss: 0.1244 - val_loss: 0.2012 - val_out1_loss: 0.2242 - val_out2_loss: 0.1474
Epoch 59/120
53/53 - 6s - loss: 0.1584 - out1_loss: 0.1736 - out2_loss: 0.1229 - val_loss: 0.2004 - val_out1_loss: 0.2242 - val_out2_loss: 0.1449
Epoch 60/120
53/53 - 6s - loss: 0.1601 - out1_loss: 0.1752 - out2_loss: 0.1246 - val_loss: 0.2004 - val_out1_loss: 0.2229 - val_out2_loss: 0.1479
Epoch 61/120
53/53 - 6s - loss: 0.1565 - out1_loss: 0.1714 - out2_loss: 0.1219 - val_loss: 0.1998 - val_out1_loss: 0.2226 - val_out2_loss: 0.1467
Epoch 62/120
53/53 - 6s - loss: 0.1554 - out1_loss: 0.1701 - out2_loss: 0.1209 - val_loss: 0.1999 - val_out1_loss: 0.2226 - val_out2_loss: 0.1472
Epoch 63/120
53/53 - 6s - loss: 0.1563 - out1_loss: 0.1708 - out2_loss: 0.1225 - val_loss: 0.1989 - val_out1_loss: 0.2220 - val_out2_loss: 0.1451
Epoch 64/120
53/53 - 6s - loss: 0.1533 - out1_loss: 0.1677 - out2_loss: 0.1197 - val_loss: 0.2030 - val_out1_loss: 0.2268 - 

53/53 - 6s - loss: 0.1291 - out1_loss: 0.1401 - out2_loss: 0.1033 - val_loss: 0.1944 - val_out1_loss: 0.2173 - val_out2_loss: 0.1412
Epoch 115/120
53/53 - 5s - loss: 0.1292 - out1_loss: 0.1403 - out2_loss: 0.1034 - val_loss: 0.1944 - val_out1_loss: 0.2172 - val_out2_loss: 0.1412
Epoch 116/120
53/53 - 5s - loss: 0.1291 - out1_loss: 0.1402 - out2_loss: 0.1033 - val_loss: 0.1944 - val_out1_loss: 0.2173 - val_out2_loss: 0.1412
Epoch 117/120
53/53 - 5s - loss: 0.1291 - out1_loss: 0.1401 - out2_loss: 0.1033 - val_loss: 0.1944 - val_out1_loss: 0.2172 - val_out2_loss: 0.1412
Epoch 118/120
53/53 - 6s - loss: 0.1291 - out1_loss: 0.1402 - out2_loss: 0.1032 - val_loss: 0.1944 - val_out1_loss: 0.2173 - val_out2_loss: 0.1412
Epoch 119/120
53/53 - 5s - loss: 0.1289 - out1_loss: 0.1399 - out2_loss: 0.1031 - val_loss: 0.1944 - val_out1_loss: 0.2173 - val_out2_loss: 0.1412
Epoch 120/120
53/53 - 5s - loss: 0.1289 - out1_loss: 0.1400 - out2_loss: 0.1032 - val_loss: 0.1945 - val_out1_loss: 0.2173 - val_out

53/53 - 7s - loss: 0.7274 - out1_loss: 0.7748 - out2_loss: 0.6170 - val_loss: 0.3914 - val_out1_loss: 0.4318 - val_out2_loss: 0.2972
Epoch 2/120
53/53 - 5s - loss: 0.3865 - out1_loss: 0.4258 - out2_loss: 0.2948 - val_loss: 0.3847 - val_out1_loss: 0.4253 - val_out2_loss: 0.2898
Epoch 3/120
53/53 - 5s - loss: 0.3812 - out1_loss: 0.4194 - out2_loss: 0.2919 - val_loss: 0.3775 - val_out1_loss: 0.4164 - val_out2_loss: 0.2865
Epoch 4/120
53/53 - 5s - loss: 0.3790 - out1_loss: 0.4171 - out2_loss: 0.2901 - val_loss: 0.3877 - val_out1_loss: 0.4303 - val_out2_loss: 0.2885
Epoch 5/120
53/53 - 5s - loss: 0.3776 - out1_loss: 0.4155 - out2_loss: 0.2891 - val_loss: 0.3776 - val_out1_loss: 0.4174 - val_out2_loss: 0.2848
Epoch 6/120
53/53 - 5s - loss: 0.3775 - out1_loss: 0.4163 - out2_loss: 0.2870 - val_loss: 0.3767 - val_out1_loss: 0.4161 - val_out2_loss: 0.2847
Epoch 7/120
53/53 - 5s - loss: 0.3780 - out1_loss: 0.4173 - out2_loss: 0.2864 - val_loss: 0.3764 - val_out1_loss: 0.4163 - val_out2_loss: 0.28

Epoch 58/120
53/53 - 5s - loss: 0.1571 - out1_loss: 0.1719 - out2_loss: 0.1226 - val_loss: 0.2022 - val_out1_loss: 0.2254 - val_out2_loss: 0.1481
Epoch 59/120
53/53 - 5s - loss: 0.1558 - out1_loss: 0.1706 - out2_loss: 0.1213 - val_loss: 0.2041 - val_out1_loss: 0.2260 - val_out2_loss: 0.1531
Epoch 60/120
53/53 - 5s - loss: 0.1549 - out1_loss: 0.1691 - out2_loss: 0.1218 - val_loss: 0.1992 - val_out1_loss: 0.2215 - val_out2_loss: 0.1470
Epoch 61/120
53/53 - 5s - loss: 0.1520 - out1_loss: 0.1662 - out2_loss: 0.1187 - val_loss: 0.1994 - val_out1_loss: 0.2218 - val_out2_loss: 0.1471
Epoch 62/120
53/53 - 5s - loss: 0.1517 - out1_loss: 0.1658 - out2_loss: 0.1186 - val_loss: 0.2003 - val_out1_loss: 0.2233 - val_out2_loss: 0.1464
Epoch 63/120
53/53 - 5s - loss: 0.1503 - out1_loss: 0.1644 - out2_loss: 0.1174 - val_loss: 0.1993 - val_out1_loss: 0.2227 - val_out2_loss: 0.1446
Epoch 64/120
53/53 - 5s - loss: 0.1508 - out1_loss: 0.1648 - out2_loss: 0.1181 - val_loss: 0.1991 - val_out1_loss: 0.2213 - 

53/53 - 5s - loss: 0.1196 - out1_loss: 0.1294 - out2_loss: 0.0968 - val_loss: 0.1936 - val_out1_loss: 0.2165 - val_out2_loss: 0.1400
Epoch 115/120
53/53 - 5s - loss: 0.1196 - out1_loss: 0.1294 - out2_loss: 0.0969 - val_loss: 0.1936 - val_out1_loss: 0.2165 - val_out2_loss: 0.1401
Epoch 116/120
53/53 - 5s - loss: 0.1194 - out1_loss: 0.1291 - out2_loss: 0.0968 - val_loss: 0.1934 - val_out1_loss: 0.2164 - val_out2_loss: 0.1399
Epoch 117/120
53/53 - 5s - loss: 0.1192 - out1_loss: 0.1289 - out2_loss: 0.0965 - val_loss: 0.1934 - val_out1_loss: 0.2163 - val_out2_loss: 0.1399
Epoch 118/120
53/53 - 5s - loss: 0.1191 - out1_loss: 0.1287 - out2_loss: 0.0966 - val_loss: 0.1934 - val_out1_loss: 0.2163 - val_out2_loss: 0.1399
Epoch 119/120
53/53 - 5s - loss: 0.1184 - out1_loss: 0.1280 - out2_loss: 0.0961 - val_loss: 0.1933 - val_out1_loss: 0.2163 - val_out2_loss: 0.1398
Epoch 120/120
53/53 - 5s - loss: 0.1184 - out1_loss: 0.1280 - out2_loss: 0.0960 - val_loss: 0.1933 - val_out1_loss: 0.2162 - val_out

In [22]:
# for i, uid in enumerate(train.id):
#     single_pred = oofs_pred[i]

#     oof_df = pd.DataFrame(single_pred, columns=pred_cols)
#     oof_df['id_seqpos'] = [f'{uid}_{x}' for x in range(oof_df.shape[0])]

- Gru 가 들어가면 좋은게 long term corelation 이 있는 것 아닐까...꼬이고 하니까
- Positional encoding 넣으면 확 뛸거 같은디