Credits: 

https://www.kaggle.com/steubk/tensorflow-bidirectional-lstm-custom-mae-loss (Masked MAE loss function) @steubk

https://www.kaggle.com/c/ventilator-pressure-prediction/discussion/281299 (some features) @nityasevak

# Key points of solution:

1. Magic features based on aggregations over R, C, rank and rounded u_in value (f1 - f6)
2. Training the model in reversed order (from 80th timestemp to the 1st one) and including into the model features generated over the right order too (for instance at 80th timestep model sees u_in value of the 80th timestep as well as the value of the 1st timestep).
3. Small features as Quantile Transformation of u_in values for u_out == 0, considering pressure values for u_out == 0 as negative values.
4. Training time: ~17 hours on TPU 3.8

In [None]:
import numpy as np
import pandas as pd
import gc
import os
import random

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize, QuantileTransformer
from sklearn.model_selection import train_test_split, GroupKFold, KFold

pd.set_option('display.max_columns',None)

print(tf.__version__)

In [None]:
def add_features(df): 
     
    df['sum_per_breath'] = df.groupby(['breath_id'])['u_in'].transform('sum') 
    
    # magic features start
    df['rounded_u_in'] = df['u_in'].round(0) 
    df['rank'] = df.groupby(['breath_id'])['time_step'].rank() 
    df['uid'] = df['R'].astype(str)+'_' + df['C'].astype(str) + '_' + df['rounded_u_in'].astype(str) + '_' + df['rank'].astype(str) 
    df['uid_count'] = df.groupby(['uid'])['uid'].transform('count') 
    df['f1'] = df.groupby(['uid'])['u_in'].transform('mean') 
    df['f2'] = df.groupby(['uid'])['u_in'].transform('min') 
    df['f3'] = df.groupby(['uid'])['u_in'].transform('max') 
    df['f4'] = df['u_in'] - df.groupby(['uid'])['u_in'].transform('mean') 
    df['f5'] = df['u_in'] - df.groupby(['uid'])['u_in'].transform('min') 
    df['f6'] = df['u_in'] - df.groupby(['uid'])['u_in'].transform('max') 
     
     
    del df['rounded_u_in'],df['rank'],df['uid'] 
    # magic features end
     
         
    df['u_in_diff_1'] = df.groupby(['breath_id'])['u_in'].diff(1) 
    df['u_in_diff_2'] = df.groupby(['breath_id'])['u_in'].diff(2) 
    df['u_in_diff_3'] = df.groupby(['breath_id'])['u_in'].diff(3) 
     
    df['u_in_diff_3'] = df['u_in_diff_3'].fillna(method='bfill') 
    df['u_in_diff_2'] = df['u_in_diff_2'].fillna(method='bfill') 
    df['u_in_diff_1'] = df['u_in_diff_1'].fillna(method='bfill') 
     
    df['time_step_diff_1'] = df.groupby(['breath_id'])['time_step'].diff(1).fillna(0) 
    df['time_step_diff_1_r'] = df.groupby(['breath_id'])['time_step'].diff(-1).fillna(0)  
    df['delta'] = df['time_step_diff_1'] * df['u_in'] 
    df['delta2'] = df['time_step_diff_1_r'] * df['u_in']  
    df['time_step_diff_2'] = df.groupby(['breath_id'])['time_step'].diff(2).fillna(0)  
    df['time_step_diff_3'] = df.groupby(['breath_id'])['time_step'].diff(3).fillna(0)  
 
    df['area'] = df.groupby(['breath_id'])['delta'].cumsum() 
    df['area2'] = df.groupby(['breath_id'])['delta2'].cumsum()  
    df['cross']= df['u_in']*df['u_out']  
    df['cross2']= df['time_step']*df['u_out']  
    df['u_in_cumsum'] = df.groupby(['breath_id'])['u_in'].cumsum() 
    df['area_cumsum'] = df.groupby(['breath_id'])['area'].cumsum()  
    df['max_to_cumsum_u_in_per_breath_id'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in_cumsum']  
     
    df['u_in_shift_1_past'] = df.groupby(['breath_id'])['u_in'].shift(1).fillna(0)  
    df['u_in_shift_2_past'] = df.groupby(['breath_id'])['u_in'].shift(2).fillna(0)  
    df['u_in_shift_3_past'] = df.groupby(['breath_id'])['u_in'].shift(3).fillna(0)  
     
    df['time_step_shift_1_past'] = df.groupby(['breath_id'])['time_step'].shift(1).fillna(0)  
    df['time_step_shift_2_past'] = df.groupby(['breath_id'])['time_step'].shift(2).fillna(0)  
    df['time_step_shift_3_past'] = df.groupby(['breath_id'])['time_step'].shift(3).fillna(0)  
 
    df['u_in_shift_1_future'] = df.groupby(['breath_id'])['u_in'].shift(-1).fillna(0)  
    df['u_in_shift_2_future'] = df.groupby(['breath_id'])['u_in'].shift(-2).fillna(0)  
    df['u_in_shift_3_future'] = df.groupby(['breath_id'])['u_in'].shift(-3).fillna(0) 
     
    df['time_step_shift_1_future'] = df.groupby(['breath_id'])['time_step'].shift(-1).fillna(0)  
    df['time_step_shift_2_future'] = df.groupby(['breath_id'])['time_step'].shift(-2).fillna(0)  
    df['time_step_shift_3_future'] = df.groupby(['breath_id'])['time_step'].shift(-3).fillna(0) 
     
    df['breath_id_u_out'] = df['breath_id'].astype(str) + '_' + df['u_out'].astype(str)  
    df['count_breath_id_u_out'] = df.groupby(['breath_id_u_out'])['breath_id_u_out'].transform('count')  
    del df['breath_id_u_out']  
 
    df['mean_u_in_per_R_C_u_out'] = df.groupby(['R','C','u_out'])['u_in'].transform('mean')  
    df['diff_mean_u_in_per_R_C_u_out'] = df['u_in'] - df['mean_u_in_per_R_C_u_out']  
    df['to_mean_u_in_per_R_C_u_out'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['mean_u_in_per_R_C_u_out']  
     
    df['max_u_in_per_R_C_u_out'] = df.groupby(['R','C','u_out'])['u_in'].transform('max')  
    df['diff_max_u_in_per_R_C_u_out'] = df['u_in'] - df['max_u_in_per_R_C_u_out']  
    df['to_max_u_in_per_R_C_u_out'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['max_u_in_per_R_C_u_out'] 
 
    df['mean_u_out_per_breath_id'] = df.groupby(['breath_id'])['u_out'].transform('mean') 
     
     
    df['R_u_in'] = df['u_in'] * df['R']  
    df['C_u_in'] = df['u_in'] * df['C'] 
    df['u_out_shift_1_past'] = df.groupby(['breath_id'])['u_out'].shift(1).fillna(0)  
    df['u_out_shift_2_past'] = df.groupby(['breath_id'])['u_out'].shift(2).fillna(0)  
    df['u_out_shift_3_past'] = df.groupby(['breath_id'])['u_out'].shift(3).fillna(0) 
    df['u_out_shift_1_future'] = df.groupby(['breath_id'])['u_out'].shift(-1).fillna(0)  
    df['u_out_shift_2_future'] = df.groupby(['breath_id'])['u_out'].shift(-2).fillna(0)  
    df['u_out_shift_3_future'] = df.groupby(['breath_id'])['u_out'].shift(-3).fillna(0) 
    
    df['exponent']=(-1*df['time_step'])/(df['R']*df['C']) 
    df['factor']=np.exp(df['exponent']) 
    df['vf']=(df['u_in_cumsum']*df['R'])/df['factor'] 
    
    df['R'] = df['R'].astype(str)  
    df['C'] = df['C'].astype(str)  
    df['R_C'] = df['R'].astype(str) + '_' + df['C'].astype(str)  
    df = pd.get_dummies(df) 
 
    return df

def add_features2(df):
     
    df['rounded_u_in'] = df['u_in'].round(0) 
    df['rank'] = df.groupby(['breath_id'])['time_step'].rank() 
    df['uid'] = df['R'].astype(str)+'_' + df['C'].astype(str) + '_' + df['rounded_u_in'].astype(str) + '_' + df['rank'].astype(str) 
    df['uid_count'] = df.groupby(['uid'])['uid'].transform('count') 
    df['f1'] = df.groupby(['uid'])['u_in'].transform('mean') 
    df['f2'] = df.groupby(['uid'])['u_in'].transform('min') 
    df['f3'] = df.groupby(['uid'])['u_in'].transform('max') 
    df['f4'] = df['u_in'] - df.groupby(['uid'])['u_in'].transform('mean') 
    df['f5'] = df['u_in'] - df.groupby(['uid'])['u_in'].transform('min') 
    df['f6'] = df['u_in'] - df.groupby(['uid'])['u_in'].transform('max') 
     
     
    del df['rounded_u_in'],df['rank'],df['uid'] 
     
         
    df['u_in_diff_1'] = df.groupby(['breath_id'])['u_in'].diff(1)
    df['u_in_diff_2'] = df.groupby(['breath_id'])['u_in'].diff(2)

    df['u_in_diff_2'] = df['u_in_diff_2'].fillna(method='bfill')
    df['u_in_diff_1'] = df['u_in_diff_1'].fillna(method='bfill')
     
    df['time_step_diff_1'] = df.groupby(['breath_id'])['time_step'].diff(1).fillna(0) 
    df['delta'] = df['time_step_diff_1'] * df['u_in']
    
    del df['time_step_diff_1']
 
    df['area'] = df.groupby(['breath_id'])['delta'].cumsum()
    del df['delta']
    
    df['u_in_cumsum'] = df.groupby(['breath_id'])['u_in'].cumsum() 
    df['area_cumsum'] = df.groupby(['breath_id'])['area'].cumsum()  
     
    df['u_in_shift_1_past'] = df.groupby(['breath_id'])['u_in'].shift(1).fillna(0)  
    df['u_in_shift_2_past'] = df.groupby(['breath_id'])['u_in'].shift(2).fillna(0) 
 
    df['u_in_shift_1_future'] = df.groupby(['breath_id'])['u_in'].shift(-1).fillna(0)  
    df['u_in_shift_2_future'] = df.groupby(['breath_id'])['u_in'].shift(-2).fillna(0)   
     
    df['u_out_shift_1_past'] = df.groupby(['breath_id'])['u_out'].shift(1).fillna(0)  
    df['u_out_shift_2_past'] = df.groupby(['breath_id'])['u_out'].shift(2).fillna(0)
    df['u_out_shift_1_future'] = df.groupby(['breath_id'])['u_out'].shift(-1).fillna(0)
    
    del df['R'], df['C']
 
    return df


def GBVPP_loss(y_true, y_pred, cols = 80):
    u_out = y_true[:, cols: ]
    y = y_true[:, :cols ]

    w = 1 - u_out
    mae = w * tf.abs(y - y_pred)
    return tf.reduce_sum(mae, axis=-1) / tf.reduce_sum(w, axis=-1)

In [None]:
SEED = 0
os.environ['PYTHONHASHSEED']=str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
    
N_FOLDS = 15

In [None]:
train_ori = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
train_ori.loc[train_ori['u_out'] == 1, 'pressure'] = train_ori['pressure'] * (-1)

test_ori = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

train_ori2 = train_ori.copy()
test_ori2 = test_ori.copy()

train_ori['rank'] = train_ori.groupby(['breath_id'])['time_step'].rank()
train_ori['neg_rank'] = -1 * train_ori['rank']
test_ori['rank'] = test_ori.groupby(['breath_id'])['time_step'].rank()
test_ori['neg_rank'] = -1 * test_ori['rank']

train_ori = train_ori.sort_values(by=['breath_id','neg_rank']).reset_index(drop=True)
test_ori = test_ori.sort_values(by=['breath_id','neg_rank']).reset_index(drop=True)

del train_ori['rank'],train_ori['neg_rank'],test_ori['rank'],test_ori['neg_rank']
gc.collect()

df = pd.concat([train_ori,test_ori],axis=0,copy=False).reset_index(drop=True)
df = add_features(df)

train = df.iloc[:len(train_ori),:]
test = df.iloc[len(train_ori):,:].reset_index(drop=True)
del test['pressure']
gc.collect()

df2 = pd.concat([train_ori2,test_ori2],axis=0,copy=False).reset_index(drop=True)
df2 = add_features2(df2)

train2 = df2.iloc[:len(train_ori2),:]
test2 = df2.iloc[len(train_ori2):,:].reset_index(drop=True)
del train2['pressure'], test2['pressure'], df2
gc.collect()

del train2['id'],train2['breath_id'],test2['id'],test2['breath_id']
gc.collect()

train2.columns = ['shifted_' + str(s) for s in train2.columns]
test2.columns = ['shifted_' + str(s) for s in test2.columns]

train = pd.concat([train,train2],axis=1,copy=False)
test = pd.concat([test,test2],axis=1,copy=False)

del train2, test2
gc.collect()

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
u_outs = train[['u_out']].to_numpy().reshape(-1, 80)
train.drop(['pressure','id', 'breath_id'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id'], axis=1)

In [None]:
QT = QuantileTransformer(random_state=0,output_distribution='normal',subsample=1000000)
QT.fit(df[df['u_out']==0]['u_in'].values.reshape(-1,1))

train['q_u_in'] = QT.transform(train['u_in'].values.reshape(-1,1))
test['q_u_in'] = QT.transform(test['u_in'].values.reshape(-1,1))

train.loc[train['u_out']==0,'u_in'] = train['q_u_in']
test.loc[test['u_out']==0,'u_in'] = test['q_u_in']
del train['q_u_in'], test['q_u_in'], df
gc.collect()

In [None]:
RS = RobustScaler(quantile_range=(20.0, 80.0))
RS.fit(train[train['u_out']==0])
train = RS.fit_transform(train)
test = RS.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])
gc.collect()

print(train.shape, test.shape)

In [None]:
EPOCH = 300
BATCH_SIZE = 512

tf.keras.backend.clear_session()


tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    
with tpu_strategy.scope():
        kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=2021)
        oof_preds = np.zeros((train.shape[0],train.shape[1]))
        test_preds = []
        for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
            print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)

            checkpoint_path = f'repeat:Fold:{fold+1}.hdf5'
            cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 0,
                                 save_best_only = True, 
                                 save_weights_only = True,
                                 mode = 'min')

            X_train, X_valid = train[train_idx], train[test_idx]
            y_train, y_valid = targets[train_idx], targets[test_idx]
            u_out_train, u_out_valid = u_outs[train_idx], u_outs[test_idx] 
            inp = keras.layers.Input(shape=train.shape[-2:])
            x1 = keras.layers.Bidirectional(keras.layers.LSTM(1024, return_sequences=True))(inp)
            concat1 = keras.layers.concatenate([inp,x1])
    
            x2 = keras.layers.Bidirectional(keras.layers.LSTM(512, return_sequences=True))(concat1)
            concat2 = keras.layers.concatenate([x1,x2])
    
            x3 = keras.layers.Bidirectional(keras.layers.LSTM(256, return_sequences=True))(concat2)
            concat3 = keras.layers.concatenate([x2,x3])
    
            x4 = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(concat3)
            concat4 = keras.layers.concatenate([x3,x4])
    
            x5 = keras.layers.Dense(128, activation='selu')(concat4)
            output = keras.layers.Dense(1)(x5)

            model = keras.models.Model(inputs=inp, outputs=output)
            model.compile(optimizer='Adam',loss=GBVPP_loss)
    
            lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
    
            es = EarlyStopping(monitor="val_loss", patience=25, verbose=1, mode="min", restore_best_weights=True)
    
            model.fit(X_train, np.append(y_train, u_out_train, axis =1), 
                      validation_data=(X_valid, np.append(y_valid, u_out_valid, axis =1)),
                      epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[lr,es,cb_checkpt])
            model.load_weights(checkpoint_path)
            valid_preds = model.predict(X_valid)
            oof_preds[test_idx] = valid_preds.reshape(valid_preds.shape[0],valid_preds.shape[1])
    
            test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())
            del X_train, X_valid, y_train, y_valid
            gc.collect()
            
    oof_preds = oof_preds.squeeze().reshape(-1,1).squeeze()
    reshaped_targets = targets.squeeze().reshape(-1,1).squeeze()
    print(mae(reshaped_targets,oof_preds))
    submission["pressure"] = np.median(np.vstack(test_preds),axis=0)
    
idx = train_ori[train_ori['u_out']==0].index
train_ori['prediction']=oof_preds
print(mae(train_ori.loc[idx,'pressure'],train_ori.loc[idx,'prediction']))
pd.DataFrame(oof_preds).to_csv('oof_preds.csv',index=0)
submission.to_csv('test_preds.csv',index=0)