In [1]:

import warnings
warnings.filterwarnings('ignore')

#the basics
import pandas as pd, numpy as np
import math, json, gc, random, os, sys
from matplotlib import pyplot as plt
from tqdm import tqdm

#tensorflow deep learning basics
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L

#for model evaluation
from sklearn.model_selection import train_test_split, KFold

In [2]:
#get comp data
train = pd.read_json('/kaggle/input/stanford-covid-vaccine/train.json', lines=True)
test = pd.read_json('/kaggle/input/stanford-covid-vaccine/test.json', lines=True)
sample_sub = pd.read_csv('/kaggle/input/stanford-covid-vaccine/sample_submission.csv')

In [3]:
#target columns
target_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

In [4]:
token2int = {x:i for i, x in enumerate('().ACGUBEHIMSX')}

In [5]:
def preprocess_inputs(df, cols=['sequence', 'structure', 'predicted_loop_type']):
    return np.transpose(
        np.array(
            df[cols]
            .applymap(lambda seq: [token2int[x] for x in seq])
            .values
            .tolist()
        ),
        (0, 2, 1)
    )

In [6]:
train_inputs = preprocess_inputs(train[train.signal_to_noise > 1])
train_labels = np.array(train[train.signal_to_noise > 1][target_cols].values.tolist()).transpose((0, 2, 1))

In [11]:
def MCRMSE(y_true, y_pred):
    colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)
    return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)




In [13]:
def gru_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.GRU(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def lstm_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.LSTM(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def build_model(gru=1,seq_len=107, pred_len=68, dropout=0.5,
                embed_dim=75, hidden_dim=128):
    
    inputs = tf.keras.layers.Input(shape=(seq_len, 3))

    embed = tf.keras.layers.Embedding(input_dim=len(token2int), output_dim=embed_dim)(inputs)
    reshaped = tf.reshape(
        embed, shape=(-1, embed.shape[1],  embed.shape[2] * embed.shape[3]))
    
    reshaped = tf.keras.layers.SpatialDropout1D(.2)(reshaped)
    
    if gru==1:
        hidden = gru_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        
    elif gru==0:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==3:
        hidden = gru_layer(hidden_dim, dropout)(reshaped)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==4:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
    elif gru==5:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
    
    #only making predictions on the first part of each sequence
    truncated = hidden[:, :pred_len]
    
    out = tf.keras.layers.Dense(5, activation='linear')(truncated)

    model = tf.keras.Model(inputs=inputs, outputs=out)

    #some optimizers
    adam = tf.optimizers.Adam()
    radam = tfa.optimizers.RectifiedAdam()
    lookahead = tfa.optimizers.Lookahead(adam, sync_period=6)
    ranger = tfa.optimizers.Lookahead(radam, sync_period=6)
    
    model.compile(optimizer = adam, loss=MCRMSE)
    
    return model

# Training

**Create train/val split now so both models are trained and evaluated on the same samples:**

In [14]:
train_inputs, val_inputs, train_labels, val_labels = train_test_split(train_inputs, train_labels,
                                                                     test_size=.1, random_state=34)

In [15]:
if tf.config.list_physical_devices('GPU') is not None:
    print('Training on GPU')

Training on GPU


**We will use a simple learning rate callback for now:**

In [16]:
lr_callback = tf.keras.callbacks.ReduceLROnPlateau()

### 1. GRU

In [17]:
gru = build_model(gru=1)
sv_gru = tf.keras.callbacks.ModelCheckpoint('model_gru.h5')

history_gru = gru.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_gru],
    verbose = 2
)

print(f"Min training loss={min(history_gru.history['loss'])}, min validation loss={min(history_gru.history['val_loss'])}")

Epoch 1/100
27/27 - 3s - loss: 0.4683 - val_loss: 0.4208
Epoch 2/100
27/27 - 1s - loss: 0.4027 - val_loss: 0.3992
Epoch 3/100
27/27 - 1s - loss: 0.3868 - val_loss: 0.3834
Epoch 4/100
27/27 - 1s - loss: 0.3724 - val_loss: 0.3682
Epoch 5/100
27/27 - 1s - loss: 0.3583 - val_loss: 0.3572
Epoch 6/100
27/27 - 1s - loss: 0.3489 - val_loss: 0.3465
Epoch 7/100
27/27 - 1s - loss: 0.3413 - val_loss: 0.3411
Epoch 8/100
27/27 - 1s - loss: 0.3363 - val_loss: 0.3358
Epoch 9/100
27/27 - 1s - loss: 0.3324 - val_loss: 0.3358
Epoch 10/100
27/27 - 1s - loss: 0.3261 - val_loss: 0.3268
Epoch 11/100
27/27 - 1s - loss: 0.3223 - val_loss: 0.3206
Epoch 12/100
27/27 - 1s - loss: 0.3168 - val_loss: 0.3185
Epoch 13/100
27/27 - 1s - loss: 0.3128 - val_loss: 0.3114
Epoch 14/100
27/27 - 1s - loss: 0.3089 - val_loss: 0.3083
Epoch 15/100
27/27 - 1s - loss: 0.3052 - val_loss: 0.3043
Epoch 16/100
27/27 - 1s - loss: 0.3011 - val_loss: 0.3019
Epoch 17/100
27/27 - 1s - loss: 0.2995 - val_loss: 0.2976
Epoch 18/100
27/27 - 1s

### 2. LSTM

In [18]:
lstm = build_model(gru=0)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_lstm.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
27/27 - 3s - loss: 0.4911 - val_loss: 0.4415
Epoch 2/100
27/27 - 1s - loss: 0.4184 - val_loss: 0.4057
Epoch 3/100
27/27 - 1s - loss: 0.3892 - val_loss: 0.3872
Epoch 4/100
27/27 - 1s - loss: 0.3767 - val_loss: 0.3750
Epoch 5/100
27/27 - 1s - loss: 0.3649 - val_loss: 0.3610
Epoch 6/100
27/27 - 1s - loss: 0.3541 - val_loss: 0.3530
Epoch 7/100
27/27 - 1s - loss: 0.3470 - val_loss: 0.3469
Epoch 8/100
27/27 - 2s - loss: 0.3409 - val_loss: 0.3374
Epoch 9/100
27/27 - 2s - loss: 0.3342 - val_loss: 0.3358
Epoch 10/100
27/27 - 1s - loss: 0.3294 - val_loss: 0.3282
Epoch 11/100
27/27 - 1s - loss: 0.3249 - val_loss: 0.3209
Epoch 12/100
27/27 - 1s - loss: 0.3212 - val_loss: 0.3181
Epoch 13/100
27/27 - 1s - loss: 0.3163 - val_loss: 0.3127
Epoch 14/100
27/27 - 1s - loss: 0.3114 - val_loss: 0.3072
Epoch 15/100
27/27 - 2s - loss: 0.3078 - val_loss: 0.3058
Epoch 16/100
27/27 - 1s - loss: 0.3035 - val_loss: 0.3006
Epoch 17/100
27/27 - 1s - loss: 0.2995 - val_loss: 0.2957
Epoch 18/100
27/27 - 1s

# 3. Hyb1

In [19]:
lstm = build_model(gru=3)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb1.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
27/27 - 3s - loss: 0.4845 - val_loss: 0.4275
Epoch 2/100
27/27 - 1s - loss: 0.4052 - val_loss: 0.3995
Epoch 3/100
27/27 - 1s - loss: 0.3861 - val_loss: 0.3857
Epoch 4/100
27/27 - 1s - loss: 0.3746 - val_loss: 0.3783
Epoch 5/100
27/27 - 1s - loss: 0.3633 - val_loss: 0.3581
Epoch 6/100
27/27 - 1s - loss: 0.3487 - val_loss: 0.3454
Epoch 7/100
27/27 - 1s - loss: 0.3403 - val_loss: 0.3384
Epoch 8/100
27/27 - 1s - loss: 0.3322 - val_loss: 0.3305
Epoch 9/100
27/27 - 1s - loss: 0.3247 - val_loss: 0.3217
Epoch 10/100
27/27 - 1s - loss: 0.3196 - val_loss: 0.3184
Epoch 11/100
27/27 - 1s - loss: 0.3132 - val_loss: 0.3089
Epoch 12/100
27/27 - 1s - loss: 0.3083 - val_loss: 0.3067
Epoch 13/100
27/27 - 1s - loss: 0.3037 - val_loss: 0.3010
Epoch 14/100
27/27 - 2s - loss: 0.2989 - val_loss: 0.2968
Epoch 15/100
27/27 - 1s - loss: 0.2957 - val_loss: 0.2964
Epoch 16/100
27/27 - 1s - loss: 0.2924 - val_loss: 0.2911
Epoch 17/100
27/27 - 2s - loss: 0.2876 - val_loss: 0.2845
Epoch 18/100
27/27 - 1s

# 4. Hyb2

In [20]:
lstm = build_model(gru=4)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb2.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
27/27 - 3s - loss: 0.4759 - val_loss: 0.4198
Epoch 2/100
27/27 - 1s - loss: 0.4024 - val_loss: 0.3962
Epoch 3/100
27/27 - 1s - loss: 0.3856 - val_loss: 0.3822
Epoch 4/100
27/27 - 1s - loss: 0.3712 - val_loss: 0.3687
Epoch 5/100
27/27 - 1s - loss: 0.3595 - val_loss: 0.3612
Epoch 6/100
27/27 - 1s - loss: 0.3535 - val_loss: 0.3522
Epoch 7/100
27/27 - 1s - loss: 0.3469 - val_loss: 0.3498
Epoch 8/100
27/27 - 1s - loss: 0.3429 - val_loss: 0.3439
Epoch 9/100
27/27 - 1s - loss: 0.3370 - val_loss: 0.3350
Epoch 10/100
27/27 - 1s - loss: 0.3328 - val_loss: 0.3339
Epoch 11/100
27/27 - 1s - loss: 0.3278 - val_loss: 0.3251
Epoch 12/100
27/27 - 1s - loss: 0.3229 - val_loss: 0.3215
Epoch 13/100
27/27 - 1s - loss: 0.3192 - val_loss: 0.3180
Epoch 14/100
27/27 - 1s - loss: 0.3149 - val_loss: 0.3139
Epoch 15/100
27/27 - 1s - loss: 0.3102 - val_loss: 0.3091
Epoch 16/100
27/27 - 1s - loss: 0.3070 - val_loss: 0.3032
Epoch 17/100
27/27 - 1s - loss: 0.3029 - val_loss: 0.2989
Epoch 18/100
27/27 - 1s

# 5. hyb3

In [21]:
lstm = build_model(gru=5)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb3.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
27/27 - 1s - loss: 0.4922 - val_loss: 0.4408
Epoch 2/100
27/27 - 1s - loss: 0.4085 - val_loss: 0.4044
Epoch 3/100
27/27 - 1s - loss: 0.3848 - val_loss: 0.3835
Epoch 4/100
27/27 - 1s - loss: 0.3701 - val_loss: 0.3696
Epoch 5/100
27/27 - 1s - loss: 0.3577 - val_loss: 0.3607
Epoch 6/100
27/27 - 1s - loss: 0.3495 - val_loss: 0.3519
Epoch 7/100
27/27 - 1s - loss: 0.3429 - val_loss: 0.3463
Epoch 8/100
27/27 - 1s - loss: 0.3390 - val_loss: 0.3425
Epoch 9/100
27/27 - 1s - loss: 0.3348 - val_loss: 0.3374
Epoch 10/100
27/27 - 1s - loss: 0.3323 - val_loss: 0.3375
Epoch 11/100
27/27 - 1s - loss: 0.3299 - val_loss: 0.3325
Epoch 12/100
27/27 - 1s - loss: 0.3253 - val_loss: 0.3299
Epoch 13/100
27/27 - 1s - loss: 0.3229 - val_loss: 0.3278
Epoch 14/100
27/27 - 1s - loss: 0.3191 - val_loss: 0.3238
Epoch 15/100
27/27 - 1s - loss: 0.3155 - val_loss: 0.3182
Epoch 16/100
27/27 - 1s - loss: 0.3120 - val_loss: 0.3140
Epoch 17/100
27/27 - 1s - loss: 0.3088 - val_loss: 0.3123
Epoch 18/100
27/27 - 1s

# Model Evaluation

# Inference and Submission

In [22]:
public_df = test.query("seq_length == 107").copy()
private_df = test.query("seq_length == 130").copy()

public_inputs = preprocess_inputs(public_df)
private_inputs = preprocess_inputs(private_df)

**Predict twice, one for the public leaderboard, the other for the private leaderboard:**

In [23]:
# build all models
gru_short = build_model(gru=1, seq_len=107, pred_len=107)
gru_long = build_model(gru=1, seq_len=130, pred_len=130)
lstm_short = build_model(gru=0, seq_len=107, pred_len=107)
lstm_long = build_model(gru=0, seq_len=130, pred_len=130)
hyb1_short = build_model(gru=3, seq_len=107, pred_len=107)
hyb1_long = build_model(gru=3, seq_len=130, pred_len=130)
hyb2_short = build_model(gru=4, seq_len=107, pred_len=107)
hyb2_long = build_model(gru=4, seq_len=130, pred_len=130)
hyb3_short = build_model(gru=5, seq_len=107, pred_len=107)
hyb3_long = build_model(gru=5, seq_len=130, pred_len=130)


# load pre-trained model weights
gru_short.load_weights('model_gru.h5')
gru_long.load_weights('model_gru.h5')
lstm_short.load_weights('model_lstm.h5')
lstm_long.load_weights('model_lstm.h5')
hyb1_short.load_weights('model_hyb1.h5')
hyb1_long.load_weights('model_hyb1.h5')
hyb2_short.load_weights('model_hyb2.h5')
hyb2_long.load_weights('model_hyb2.h5')
hyb3_short.load_weights('model_hyb3.h5')
hyb3_long.load_weights('model_hyb3.h5')

# and predict
gru_public_preds = gru_short.predict(public_inputs)
gru_private_preds = gru_long.predict(private_inputs)
lstm_public_preds = lstm_short.predict(public_inputs)
lstm_private_preds = lstm_long.predict(private_inputs)
hyb1_public_preds = hyb1_short.predict(public_inputs)
hyb1_private_preds = hyb1_long.predict(private_inputs)
hyb2_public_preds = hyb2_short.predict(public_inputs)
hyb2_private_preds = hyb2_long.predict(private_inputs)
hyb3_public_preds = hyb3_short.predict(public_inputs)
hyb3_private_preds = hyb3_long.predict(private_inputs)

**Now we just need to change the shape of each sample to the long format:**

In [24]:
preds_gru = []

for df, preds in [(public_df, gru_public_preds), (private_df, gru_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_gru.append(single_df)

preds_gru_df = pd.concat(preds_gru)
preds_gru_df.head()

Unnamed: 0,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,id_seqpos
0,0.813204,0.738167,1.892738,0.613165,0.810794,id_00073f8be_0
1,2.503969,3.582739,4.504695,3.598233,2.934643,id_00073f8be_1
2,1.264525,0.555534,0.651592,0.724087,0.663601,id_00073f8be_2
3,1.115623,0.917757,1.156807,1.523429,1.525713,id_00073f8be_3
4,0.748457,0.511262,0.672645,0.849919,0.819849,id_00073f8be_4


**Now we do the same for the LSTM model so we can blend their predictions:**

In [25]:
preds_lstm = []

for df, preds in [(public_df, lstm_public_preds), (private_df, lstm_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_lstm.append(single_df)

preds_lstm_df = pd.concat(preds_lstm)
preds_lstm_df.head()

Unnamed: 0,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,id_seqpos
0,0.729518,0.669326,1.906504,0.548261,0.768921,id_00073f8be_0
1,2.165667,3.136641,4.320129,3.364549,2.972424,id_00073f8be_1
2,1.433603,0.462998,0.559417,0.564855,0.619762,id_00073f8be_2
3,1.272654,1.085803,1.200453,1.639748,1.648433,id_00073f8be_3
4,0.924945,0.57339,0.539607,0.85025,0.874479,id_00073f8be_4


For hyb1:

In [26]:
preds_hyb1 = []

for df, preds in [(public_df, hyb1_public_preds), (private_df, hyb1_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb1.append(single_df)

preds_hyb1_df = pd.concat(preds_hyb1)
preds_hyb1_df.head()

Unnamed: 0,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,id_seqpos
0,0.777582,0.662388,1.87484,0.558371,0.776229,id_00073f8be_0
1,2.330886,3.043195,3.881663,3.145874,2.651846,id_00073f8be_1
2,1.377723,0.471362,0.5785,0.510297,0.548768,id_00073f8be_2
3,1.211197,1.08998,1.223212,1.603461,1.615322,id_00073f8be_3
4,0.83553,0.573457,0.632227,0.79281,0.815213,id_00073f8be_4


For Hyb2

In [27]:
preds_hyb2 = []

for df, preds in [(public_df, hyb2_public_preds), (private_df, hyb2_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb2.append(single_df)

preds_hyb2_df = pd.concat(preds_hyb2)
preds_hyb2_df.head()

Unnamed: 0,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,id_seqpos
0,0.829607,0.825044,2.147527,0.652266,0.810475,id_00073f8be_0
1,2.374135,3.123879,4.387527,3.341963,2.847752,id_00073f8be_1
2,1.690504,0.692595,0.790801,0.756798,0.754297,id_00073f8be_2
3,1.344921,1.217306,1.306061,1.819828,1.719311,id_00073f8be_3
4,0.894565,0.727556,0.659258,1.000475,0.86543,id_00073f8be_4


For hyb3

In [28]:
preds_hyb3 = []

for df, preds in [(public_df, hyb3_public_preds), (private_df, hyb3_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb3.append(single_df)

preds_hyb3_df = pd.concat(preds_hyb3)
preds_hyb3_df.head()

Unnamed: 0,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C,id_seqpos
0,0.844596,0.67775,1.975148,0.526269,0.727702,id_00073f8be_0
1,2.273908,3.147098,4.132948,3.163097,2.713194,id_00073f8be_1
2,1.301402,0.473965,0.612442,0.659649,0.691871,id_00073f8be_2
3,1.314,1.095895,1.178771,1.573259,1.464372,id_00073f8be_3
4,0.878311,0.520592,0.565562,0.800511,0.863788,id_00073f8be_4


**And now we blend:**

In [29]:
blend_preds_df = pd.DataFrame()
blend_preds_df['id_seqpos'] = preds_gru_df['id_seqpos']
blend_preds_df['reactivity'] = 0.2*preds_gru_df['reactivity'] + 0.2*preds_lstm_df['reactivity'] + 0.2*preds_hyb1_df['reactivity'] + 0.2*preds_hyb2_df['reactivity'] + 0.2*preds_hyb3_df['reactivity']
blend_preds_df['deg_Mg_pH10'] = 0.2*preds_gru_df['deg_Mg_pH10'] + 0.2*preds_lstm_df['deg_Mg_pH10'] + 0.2*preds_hyb1_df['deg_Mg_pH10'] + 0.2*preds_hyb2_df['deg_Mg_pH10'] + 0.2*preds_hyb3_df['deg_Mg_pH10']
blend_preds_df['deg_pH10'] = 0.2*preds_gru_df['deg_pH10'] + 0.2*preds_lstm_df['deg_pH10'] + 0.2*preds_hyb1_df['deg_pH10'] + 0.2*preds_hyb2_df['deg_pH10'] + 0.2*preds_hyb3_df['deg_pH10']
blend_preds_df['deg_Mg_50C'] = 0.2*preds_gru_df['deg_Mg_50C'] + 0.2*preds_lstm_df['deg_Mg_50C'] + 0.2*preds_hyb1_df['deg_Mg_50C'] + 0.2*preds_hyb2_df['deg_Mg_50C'] + 0.2*preds_hyb3_df['deg_Mg_50C']
blend_preds_df['deg_50C'] = 0.2*preds_gru_df['deg_50C'] + 0.2*preds_lstm_df['deg_50C'] + 0.2*preds_hyb1_df['deg_50C'] + 0.2*preds_hyb2_df['deg_Mg_50C'] + 0.2*preds_hyb3_df['deg_Mg_50C']

In [30]:
submission = sample_sub[['id_seqpos']].merge(blend_preds_df, on=['id_seqpos'])

#sanity check
submission.head()

Unnamed: 0,id_seqpos,reactivity,deg_Mg_pH10,deg_pH10,deg_Mg_50C,deg_50C
0,id_00073f8be_0,0.798901,0.714535,1.959352,0.579667,0.706896
1,id_00073f8be_1,2.329713,3.20671,4.245393,3.322743,3.012795
2,id_00073f8be_2,1.413551,0.531291,0.63855,0.643137,0.649716
3,id_00073f8be_3,1.251679,1.081348,1.213061,1.631945,1.636511
4,id_00073f8be_4,0.856362,0.581251,0.61386,0.858793,0.862105


In [31]:
submission.to_csv('submission.csv', index=False)
print('Submission saved')

Submission saved
