In [None]:
import numpy as np
import pandas as pd

import optuna

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

from IPython.display import display

In [None]:
DEBUG = False

train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')



additional features from here @Dan Ofer

https://www.kaggle.com/danofer/lgbm-lover-s

In [None]:


def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    df['u_in_lag'] = df['u_in'].shift(2).fillna(0)
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df = pd.get_dummies(df)
    df['ewm_u_in_mean'] = df.groupby('breath_id')['u_in'].ewm(halflife=8).mean().reset_index(level=0,drop=True)
    df['ewm_u_in_std'] = df.groupby('breath_id')['u_in'].ewm(halflife=9).std().reset_index(level=0,drop=True) ## could add covar?
    df['ewm_u_in_corr'] = df.groupby('breath_id')['u_in'].ewm(halflife=14).corr().reset_index(level=0,drop=True) # self umin corr
    #df['ewm_u_in_corr'] = df.groupby('breath_id')['u_in'].ewm(halflife=6).corr(df.groupby('breath_id')["u_out"]).reset_index(level=0,drop=True) # corr with u_out # error
    ## rolling window of 15 periods
    df[["15_in_max","15_out_std"]] = df.groupby('breath_id')['u_in'].rolling(window=15,min_periods=1).agg({"15_in_max":"max","15_in_std":"std"}).reset_index(level=0,drop=True)

    #df[["45_in_sum","45_in_min","45_in_max","45_in_mean","45_out_std"]] = df.groupby('breath_id')['u_in'].rolling(window=45,min_periods=1).agg({"45_in_sum":"sum","45_in_min":"min","45_in_max":"max","45_in_mean":"mean","45_in_std":"std"}).reset_index(level=0,drop=True)
    #df[["15_out_mean"]] = df.groupby('breath_id')['u_out'].rolling(window=15,min_periods=1).agg({"15_out_mean":"mean"}).reset_index(level=0,drop=True)
    return df

train = add_features(train)
test = add_features(test)

In [None]:
train=train.fillna(0)
test=test.fillna(0)
train.shape,test.shape

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure', 'id', 'breath_id'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id'], axis=1)

In [None]:
test.shape,train.shape

In [None]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

In [None]:
EPOCH = 225
BATCH_SIZE = 1024

# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

test_preds = []

with tpu_strategy.scope():
    kf = KFold(n_splits=5, shuffle=True, random_state=2021)

    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        model = keras.models.Sequential([
            keras.layers.Input(shape=train.shape[-2:]),
            keras.layers.Bidirectional(keras.layers.LSTM(500, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(375, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(200, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences=True)),
            keras.layers.Dense(100, activation='selu'),
            keras.layers.Dense(1),
        ])
        model.compile(optimizer="adam", loss="mae")

        scheduler = ExponentialDecay(1e-3, 400*((len(train)*0.8)/BATCH_SIZE), 1e-5)
        lr = LearningRateScheduler(scheduler, verbose=1)

        es = EarlyStopping(monitor="val_loss", patience=15, verbose=1, mode="min", restore_best_weights=True)

        model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[lr])
        #model.save(f'Fold{fold+1} RNN Weights')
        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

In [None]:
print('done')

In [None]:
submission["pressure"] = sum(test_preds)/5
submission.to_csv('submission.csv', index=False)