In [None]:
import pandas as pd
import numpy as np
import lightgbm
from IPython.display import display
from sklearn.model_selection import train_test_split, GroupKFold, KFold
from sklearn.metrics import mean_absolute_error
import optuna
from tensorflow import keras
import tensorflow as tf
from sklearn.preprocessing import normalize

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
display(train)
display(test)
display(submission)

In [None]:
train['u_in_cumsum'] = (train['u_in']).groupby(train['breath_id']).cumsum()
test['u_in_cumsum'] = (test['u_in']).groupby(test['breath_id']).cumsum()

In [None]:
train['u_in_lag'] = train['u_in'].shift(2)
train = train.fillna(0)

test['u_in_lag'] = test['u_in'].shift(2)
test = test.fillna(0)

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure', 'id', 'breath_id', 'u_out'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id', 'u_out'], axis=1)
train = train.to_numpy().reshape(75450, 80, 6)

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=2021)
test_preds = []
for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = targets[train_idx], targets[test_idx]
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 200*((len(train)*0.8)/1024), 1e-5)
    model = keras.models.Sequential([
        keras.layers.Input(shape=(80, 6)),
        keras.layers.Bidirectional(keras.layers.LSTM(200, return_sequences=True)),
        keras.layers.Bidirectional(keras.layers.LSTM(150, return_sequences=True)),
        keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences=True)),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(1),
    ])
    model.compile(optimizer="adam", loss="mae")
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=200, batch_size=1024, callbacks=[tf.keras.callbacks.LearningRateScheduler(scheduler)])
    model.save(f'Fold{fold+1} RNN Weights')
    test_preds.append(model.predict(test.to_numpy().reshape(50300, 80, 6)).squeeze().reshape(-1, 1).squeeze())

In [None]:
submission["pressure"] = sum(test_preds)/5
submission.to_csv('submission.csv', index=False)