## Import libraries

In [None]:
import gc,os
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error

import tensorflow as tf
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.layers import Concatenate, LSTM, GRU
from tensorflow.keras.layers import Bidirectional, Multiply
from scipy.signal import hilbert, chirp
from scipy.fft import fft, fftfreq

np.random.seed(42)
tf.random.set_seed(42)

from IPython.display import display

from tqdm import tqdm
from tqdm.keras import TqdmCallback
import matplotlib.pyplot as plt

In [None]:
TARGET_VAR='pressure'
DEBUG = False

## Load source datasets

In [None]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
print(f"train_df: {train_df.shape}")

if DEBUG:
    train_df = train_df[:80*1000]
    
train_idx = train_df.id.values.tolist()

In [None]:
test_df = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
print(f"test_df: {test_df.shape}")

In [None]:
all_pressure = np.sort(train_df.pressure.unique())
print('The first 25 unique pressures...')
PRESSURE_MIN = all_pressure[0].item()
PRESSURE_MAX = all_pressure[-1].item()
print(all_pressure[:25])
print('The differences between first 25 pressures...')
PRESSURE_STEP = ( all_pressure[1] - all_pressure[0] ).item()
all_pressure[1:26] - all_pressure[:25]
del all_pressure
gc.collect()

## Feature Engineering

In [None]:
def add_features(df):
    df['cross']= df['u_in'] * df['u_out']
    df['cross2']= df['time_step'] * df['u_out']
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['time_step_cumsum'] = df.groupby(['breath_id'])['time_step'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    #df["u_in_first"] = df.groupby("breath_id")["u_in"].transform("first")
    #df["u_in_last"]  = df.groupby("breath_id")["u_in"].transform("last")
     
    
    print("Step-1...Completed")
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    #df['u_in_lag5'] = df.groupby('breath_id')['u_in'].shift(5)
    #df['u_out_lag5'] = df.groupby('breath_id')['u_out'].shift(5)
    #df['u_in_lag_back5'] = df.groupby('breath_id')['u_in'].shift(-5)
    #df['u_out_lag_back5'] = df.groupby('breath_id')['u_out'].shift(-5)
    #df['u_in_lag6'] = df.groupby('breath_id')['u_in'].shift(6)
    #df['u_out_lag6'] = df.groupby('breath_id')['u_out'].shift(6)
    #df['u_in_lag_back6'] = df.groupby('breath_id')['u_in'].shift(-6)
    #df['u_out_lag_back6'] = df.groupby('breath_id')['u_out'].shift(-6)
    
    
    
    df = df.fillna(0)
    print("Step-2...Completed")
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_in__mean'] = df.groupby(['breath_id'])['u_in'].transform('mean')
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    print("Step-3...Completed")
    
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    print("Step-4...Completed")
    
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    
    df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['breath_id__u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['breath_id__u_in_lag'] = df['breath_id__u_in_lag'] * df['breath_id_lagsame']
    df['breath_id__u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['breath_id__u_in_lag2'] = df['breath_id__u_in_lag2'] * df['breath_id_lag2same']
    print("Step-5...Completed")
    
    df['time_step_diff'] = df.groupby('breath_id')['time_step'].diff().fillna(0)
    df['ewm_u_in_mean'] = (df\
                           .groupby('breath_id')['u_in']\
                           .ewm(halflife=9)\
                           .mean()\
                           .reset_index(level=0,drop=True))
    df[["15_in_sum","15_in_min","15_in_max","15_in_mean"]] = (df\
                                                              .groupby('breath_id')['u_in']\
                                                              .rolling(window=15,min_periods=1)\
                                                              .agg({"15_in_sum":"sum",
                                                                    "15_in_min":"min",
                                                                    "15_in_max":"max",
                                                                    "15_in_mean":"mean"})\
                                                               .reset_index(level=0,drop=True))
    print("Step-6...Completed")
    
    df['u_in_lagback_diff1'] = df['u_in'] - df['u_in_lag_back1']
    df['u_out_lagback_diff1'] = df['u_out'] - df['u_out_lag_back1']
    df['u_in_lagback_diff2'] = df['u_in'] - df['u_in_lag_back2']
    df['u_out_lagback_diff2'] = df['u_out'] - df['u_out_lag_back2']
    print("Step-7...Completed")
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    print("Step-8...Completed")
    
    ffta = lambda x: np.abs(fft(np.append(x.values,x.values[0]))[:80])
    ffta.__name__ = 'ffta'

    fftw = lambda x: np.abs(fft(np.append(x.values,x.values[0])*w)[:80])
    fftw.__name__ = 'fftw'

    #df['fft_u_in'] = df.groupby('breath_id')['u_in'].transform(ffta)
    #df['fft_u_in_w'] = df.groupby('breath_id')['u_in'].transform(fftw)
    df['analytical'] = df.groupby('breath_id')['u_in'].transform(hilbert)
    df['envelope'] = np.abs(df['analytical'])
    df['phase'] = np.angle(df['analytical'])
    df['unwrapped_phase'] = df.groupby('breath_id')['phase'].transform(np.unwrap)
    df['phase_shift1'] = df.groupby('breath_id')['unwrapped_phase'].shift(1).astype(np.float32)
    df['IF'] = df['unwrapped_phase'] - df['phase_shift1'].astype(np.float32)
    
    df.drop(['analytical','phase', 'unwrapped_phase','phase_shift1'], axis=1, inplace=True)

    df = df.fillna(0)
    print("Step 9 ... Add evelope and IF features")
    return df


print("Train data...\n")
train = add_features(train_df)

print("\nTest data...\n")
test = add_features(test_df)

del train_df
del test_df
gc.collect()

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)

train.drop(['pressure','id', 'breath_id','one','count',
            'breath_id_lag','breath_id_lag2','breath_id_lagsame',
            'breath_id_lag2same'], axis=1, inplace=True)

test = test.drop(['id', 'breath_id','one','count','breath_id_lag',
                  'breath_id_lag2','breath_id_lagsame',
                  'breath_id_lag2same'], axis=1)

print(f"train: {train.shape} \ntest: {test.shape}")

In [None]:
scaler = RobustScaler()
train = scaler.fit_transform(train)
# Add index column .
train = np.c_[train,train_idx]
test = scaler.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
# -1 to compensate for extra Index column in test data 
test = test.reshape(-1, 80, train.shape[-1]-1)

print(f"train: {train.shape} \ntest: {test.shape} \ntargets: {targets.shape}")
COLS=train.shape[-1]
COLS

## Hardware config

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    BATCH_SIZE = tpu_strategy.num_replicas_in_sync * 64
    print("Running on TPU:", tpu.master())
    print(f"Batch Size: {BATCH_SIZE}")
    
except ValueError:
    strategy = tf.distribute.get_strategy()
    BATCH_SIZE = 512
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    print(f"Batch Size: {BATCH_SIZE}")

In [None]:
# Utils
# Function to get hardware strategy
def get_hardware_strategy():
    try:
        # TPU detection. No parameters necessary if TPU_NAME environment variable is
        # set: this is always the case on Kaggle.
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        tf.config.optimizer.set_jit(True)
    else:
        # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
        strategy = tf.distribute.get_strategy()

    return tpu, strategy
tpu, strategy = get_hardware_strategy()

def plot_history(history):
    # https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/
    #print(history.history.keys()) 
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def make_submission(name,final_predictions):
    """Makes submission for testing"""
    sample_submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
    try:
        os.remove(f"submission_{name}.csv")
    except (OSError, IOError) as e:    
        #gulp
        print(f"Gulp {name}")
    # https://www.kaggle.com/c/ventilator-pressure-prediction/discussion/276138    
    preds = np.column_stack(final_predictions)
    sample_submission[TARGET_VAR] = np.mean(preds, axis=1)
    sample_submission.to_csv(f"submission_mean_{name}.csv", index=False)
    
    sample_submission[TARGET_VAR] = np.median(preds, axis=1)
    sample_submission.to_csv(f"submission_median_{name}.csv", index=False)
    
    # ENSEMBLE FOLDS WITH MEDIAN AND ROUND PREDICTIONS
    sample_submission[TARGET_VAR] = np.round( (sample_submission[TARGET_VAR] - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
    sample_submission[TARGET_VAR] = np.clip(sample_submission[TARGET_VAR], PRESSURE_MIN, PRESSURE_MAX)
    sample_submission.to_csv(f"submission.csv", index=False)

## Keras DNN Model

In [None]:
from tensorflow.keras.layers import Input, Dense, LSTM,  Conv1D,Dropout,Bidirectional,Multiply
from tensorflow.keras.models import Model
#from attention_utils import get_activations
from tensorflow.keras.layers import Multiply,Permute,Flatten,Add 
from tensorflow.keras.models import *
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K


SINGLE_ATTENTION_VECTOR = False
def attention_3d_block(inputs):
    # inputs.shape = (batch_size, time_steps, input_dim)
    input_dim = int(inputs.shape[2])
    a = inputs
    #a = Permute((2, 1))(inputs)
    #a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
    a = Dense(input_dim, activation='softmax')(a)
    if SINGLE_ATTENTION_VECTOR:
        a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
        a = RepeatVector(input_dim)(a)
    a_probs = Permute((1, 2), name='attention_vec')(a)

    #output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul')
    output_attention_mul = Multiply()([inputs, a_probs])
    return output_attention_mul

class Attention(Layer):
    
    def __init__(self, return_sequences=True):
        self.return_sequences = return_sequences
        super(Attention,self).__init__()
        
    def build(self, input_shape):
        
        self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                               initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                               initializer="zeros")
        
        super(Attention,self).build(input_shape)
        
    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'return_sequences': self.return_sequences 
        })
        return config    
        
    def call(self, x):
        
        e = K.tanh(K.dot(x,self.W)+self.b)
        a = K.softmax(e, axis=1)
        output = x*a
        
        if self.return_sequences:
            return output
        
        return K.sum(output, axis=1)
    
def dnn_model():
    
    x_input = Input(shape=(test.shape[-2:]))
    
    x1 = Bidirectional(LSTM(units=768, return_sequences=True))(x_input)
    x2 = Bidirectional(LSTM(units=512, return_sequences=True))(x1)
    x3 = Bidirectional(LSTM(units=384, return_sequences=True))(x2)
    x4 = Bidirectional(LSTM(units=256, return_sequences=True))(x3)
    x5 = Bidirectional(LSTM(units=128, return_sequences=True))(x4)
    
    z2 = Bidirectional(GRU(units=384, return_sequences=True))(x2)
    
    z31 = Multiply()([x3, z2])
    z31 = BatchNormalization()(z31)
    z3 = Bidirectional(GRU(units=256, return_sequences=True))(z31)
    
    z41 = Multiply()([x4, z3])
    z41 = BatchNormalization()(z41)
    z4 = Bidirectional(GRU(units=128, return_sequences=True))(z41)
    
    z51 = Multiply()([x5, z4])
    z51 = BatchNormalization()(z51)
    z5 = Bidirectional(GRU(units=64, return_sequences=True))(z51)
    
    x = Concatenate(axis=2)([x5, z2, z3, z4, z5])
    
    x = Dense(units=128, activation='selu')(x)
    
    x_output = Dense(units=1)(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

def dnn_model_2():
    
    x_input = Input(shape=(train.shape[-2:]))
    
    x1 = Bidirectional(LSTM(units=768, return_sequences=True))(x_input)
    x2 = Bidirectional(LSTM(units=512, return_sequences=True))(x1)
    x3 = Bidirectional(LSTM(units=256, return_sequences=True))(x2)
    
    z2 = Bidirectional(GRU(units=256, return_sequences=True))(x2)
    z3 = Bidirectional(GRU(units=128, return_sequences=True))(Add()([x3, z2]))
    
    x = Concatenate(axis=2)([x3, z2, z3])
    x = Bidirectional(LSTM(units=192, return_sequences=True))(x)
    
    x = Dense(units=128, activation='selu')(x)
    
    x_output = Dense(units=1)(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="selu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def transformer_model(
    input_shape=train.shape[-2:],
    head_size=512,
    num_heads=8,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    dropout=0,
    mlp_dropout=0,
):
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    #x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="selu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(units=1, activation="softmax")(x)
    return Model(inputs, outputs)



def transformer_model_bi(
    input_shape=train.shape[-2:],
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=8,
    mlp_units=[128],
    dropout=0,
    mlp_dropout=0,
):
    x_input = Input(shape=(test.shape[-2:]))
    
    x1 = Bidirectional(LSTM(units=768, return_sequences=True))(x_input)
    x2 = Bidirectional(LSTM(units=512, return_sequences=True))(x1)
    x3 = Bidirectional(LSTM(units=384, return_sequences=True))(x2)
    x4 = Bidirectional(LSTM(units=256, return_sequences=True))(x3)
    x5 = Bidirectional(LSTM(units=128, return_sequences=True))(x4)
    
    z2 = Bidirectional(GRU(units=384, return_sequences=True))(x2)
    
    z31 = Multiply()([x3, z2])
    z31 = Attention(return_sequences=True)(z31)
    z3 = Bidirectional(GRU(units=256, return_sequences=True))(z31)
    
    z41 = Multiply()([x4, z3])
    z41 =Attention(return_sequences=True)(z41)
    z4 = Bidirectional(GRU(units=128, return_sequences=True))(z41)
    
    z51 = Multiply()([x5, z4])
    z51 = Attention(return_sequences=True)(z51)
    z5 = Bidirectional(GRU(units=64, return_sequences=True))(z51)
    
    x = Concatenate(axis=2)([x5, z2, z3, z4, z5])
    
    x = Dense(units=128, activation='selu')(x)
    
    x_output = Dense(units=1)(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

def transformer_model11(
    input_shape=train.shape[-2:],
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=1,
    mlp_units=[128],
    dropout=0,
    mlp_dropout=0,
):
    x_input = Input(shape=(train.shape[-2:]))
    
    x1 = Bidirectional(LSTM(units=768, return_sequences=True))(x_input)
    x2 = Bidirectional(LSTM(units=512, return_sequences=True))(x1)
    x3 = Bidirectional(LSTM(units=256, return_sequences=True))(x2)
    #x2 = Attention(return_sequences=True)(x2)
    z2 = Bidirectional(GRU(units=256, return_sequences=True))(x2)
    z3 = Bidirectional(GRU(units=128, return_sequences=True))(Add()([x3, z2]))
    
    x = Concatenate(axis=2)([x3, z2, z3])
    x = Bidirectional(LSTM(units=192, return_sequences=True))(x)
    #x = attention_3d_block(x)
    #x = Flatten()(x)
    x = Dense(units=128, activation='selu')(x)
    
    x_output = Dense(units=1)(x)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

def _3d_to_2d(arr):
    return arr.reshape((arr.shape[0]*arr.shape[1]),arr.shape[-1])

In [None]:
#model = dnn_model()
#model.summary()

In [None]:
with strategy.scope():
    
    VERBOSE = 0
    SEED = 2021
    FOLDS = 10
    test_preds = []
    final_valid_predictions = {}
    scores = []
    name = "dlstm_2021_ftrs"
    kf = KFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        X_train, X_valid = train[train_idx][:,:,0:COLS-1], train[test_idx][:,:,0:COLS-1]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        #checkpoint_filepath=f"../input/gb-vpp-pulp-fiction/Bidirect_LSTM_model_2021_1{fold+1}C.h5"
        checkpoint_filepath=f"./Bidirect_LSTM_model_10_{fold+1}C.h5"
        checkpoint_filepath_rmt = f"../input/vpp-blend-4-new-features/Bidirect_LSTM_model_10_{fold+1}C.h5"
        if os.path.exists(checkpoint_filepath_rmt):
            model = tf.keras.models.load_model(checkpoint_filepath_rmt) 
        else:
            model = dnn_model()
            model.compile(optimizer="adam", loss="mae", metrics=["mae"])

            lr = ReduceLROnPlateau(monitor="val_loss", factor=0.85, 
                                   patience=7, verbose=VERBOSE)

            save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
            chk_point = ModelCheckpoint(f'./Bidirect_LSTM_model_10_{fold+1}C.h5', options=save_locally, 
                                        monitor='val_loss', verbose=VERBOSE, 
                                        save_best_only=True, mode='min')

            es = EarlyStopping(monitor="val_loss", patience=30, 
                               verbose=VERBOSE, mode="min", 
                               restore_best_weights=True)               
            history = model.fit(X_train, y_train, 
                      validation_data=(X_valid, y_valid), 
                      epochs=300,
                      verbose=VERBOSE,
                      batch_size=BATCH_SIZE, 
                      callbacks=[lr, chk_point, es,TqdmCallback(verbose=1)])
            if DEBUG:
                plot_history(history)
            del history    
                
        y_true = y_valid.squeeze().reshape(-1, 1)
        y_pred = model.predict(X_valid, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1)
        valid_idx = _3d_to_2d(train[test_idx])[:,COLS-1].astype(int) 
        print(len(valid_idx),len(y_pred))
        final_valid_predictions.update(dict(zip(valid_idx, y_pred)))
        del valid_idx
        gc.collect()
        score = mean_absolute_error(y_true, y_pred)
        scores.append(score)
        print(f"Fold-{fold+1} | OOF Score: {score}")
        del X_train
        del y_train
        del X_valid
        del y_valid
        del y_true
        del y_pred
        gc.collect()
        test_preds.append(model.predict(test, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1).squeeze())
    print(f"Final Score : Median {np.median(scores)}, Mean {np.mean(scores)}")
    final_valid_predictions = pd.DataFrame.from_dict(final_valid_predictions, orient="index").reset_index()
    final_valid_predictions.columns = ["id", f"pred_{name}"]
    final_valid_predictions.to_csv(f"train_{name}_pred.csv", index=False)
    sample_submission_temp = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
    sample_submission_temp[TARGET_VAR] = np.median(np.column_stack(test_preds), axis=1)
    sample_submission_temp[TARGET_VAR] = np.round( (sample_submission_temp[TARGET_VAR] - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
    sample_submission_temp[TARGET_VAR] = np.clip(sample_submission_temp[TARGET_VAR], PRESSURE_MIN, PRESSURE_MAX)
    sample_submission_temp.columns = ["id", f"pred_{name}"]
    sample_submission_temp.to_csv(f"test_{name}_pred.csv", index=False)

## Create submission file

In [None]:
make_submission(name,test_preds) 
# Baseline Final Score : Median 0.6182050890824652, Mean 0.6254582082423709
# 10 Folds . Score : Median 0.6107518408661121, Mean 0.6115367082590414 
# 7 folds Median 0.6255510868051626, Mean 0.6178064096651388
# 7 fold with attention   Median 0.6253117080984856, Mean 0.6308399315111733
# Final Score : Median 0.6842507432512323, Mean 0.6816680787380409
# Final Score : Median 0.6226577897596568, Mean 0.6165830909037122 
# With new Features : Final Score : Median 0.6238020513760956, Mean 0.6362880387969907 with new features
# With Attention Layer : Final Score : Median 0.606840550218398, Mean 0.6142074864099231

