This notebook is inspired by recent notebooks from [Zhangxin](https://www.kaggle.com/tenffe/finetune-of-tensorflow-bidirectional-lstm) and [Chris Deotte](https://www.kaggle.com/cdeotte/ensemble-folds-with-median-0-153). Since it is important to dicretize the output, I propose a custom TensorFlow layer that will automatically do that for you. The optimization will therefore happen under contrains that the output should be bounded and discrete as the inputed.

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

In [None]:
DEBUG = False

train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

if DEBUG:
    train = train[:80*1000]

In [None]:
train

In [None]:
all_pressure = sorted(train.pressure.unique())
PRESSURE_MIN = np.min(all_pressure)
PRESSURE_MAX = np.max(all_pressure)
PRESSURE_STEP = all_pressure[1] - all_pressure[0]

In [None]:
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')
    
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    return df

In [None]:
train = add_features(train)
test = add_features(test)

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure', 'id', 'breath_id'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id'], axis=1)

In [None]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [None]:
train = train.reshape(-1, 80, train.shape[-1]).astype(np.float32)
test = test.reshape(-1, 80, train.shape[-1]).astype(np.float32)
targets = targets.astype(np.float32)

The following custom layer will rescale the output to fit the discrete steps in values to be found in the target. In such a way, you will force your network to learn how to provide outputs that do not need further post processing.

Please notice the custom rounding **round_with_gradients** function since tf.round has no gradients and it won't be differentiable.

In [None]:
@tf.custom_gradient
def round_with_gradients(x):
    def grad(dy):
        return dy
    return tf.round(x), grad

class ScaleLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(ScaleLayer, self).__init__()
        self.min = tf.constant(PRESSURE_MIN, dtype=np.float32)
        self.max = tf.constant(PRESSURE_MAX, dtype=np.float32)
        self.step = tf.constant(PRESSURE_STEP, dtype=np.float32)

    def call(self, inputs):
        steps = tf.math.divide(tf.math.add(inputs, -self.min), self.step)
        int_steps = round_with_gradients(steps)
        rescaled_steps = tf.math.add(tf.math.multiply(int_steps, self.step), self.min)
        clipped = tf.clip_by_value(rescaled_steps, self.min, self.max)
        return clipped

In [None]:
EPOCH = 300
BATCH_SIZE = 1024
NUM_FOLDS = 10

In [None]:
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    
    K = keras.backend

    def create_model():
        inputs = keras.layers.Input(shape=train.shape[-2:])
        x = inputs
        for units in [1024, 512, 256, 128]:
            x = keras.layers.Bidirectional(keras.layers.LSTM(units, return_sequences=True))(x)
        x = keras.layers.Dense(128, activation='selu')(x)
        outputs = keras.layers.Dense(1)(x)
        outputs = ScaleLayer()(outputs)
        
        model  = keras.Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer="adam", loss='mae') 
        return model
    
    kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=1970)
    test_preds = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        
        model = create_model()

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
        es = EarlyStopping(monitor="val_loss", patience=60, verbose=1, 
                           mode="min", restore_best_weights=True)
    
        checkpoint_filepath = f"folds{fold}.hdf5"
        sv = keras.callbacks.ModelCheckpoint(
            checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True,
            save_weights_only=False, mode='auto', save_freq='epoch',
            options=None
        )

        model.fit(X_train, y_train, validation_data=(X_valid, y_valid), 
                  epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[lr, es, sv])
        
        test_preds.append(model.predict(test, batch_size=BATCH_SIZE, verbose=2)
                          .squeeze().reshape(-1, 1).squeeze())

In [None]:
submission["pressure"] = np.median(np.vstack(test_preds), axis=0)
submission.to_csv('submission.csv', index=False)

In [None]:
submission.head()