In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import pandas as pd

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error as mae

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [None]:
train = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/test.csv')
ss = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
def fe(data):
    data['u_in_lag1'] = data['u_in'].shift(1)
    data['u_in_lag2'] = data['u_in'].shift(2)
    data['u_in_lag3'] = data['u_in'].shift(3)
    data['u_in_lag4'] = data['u_in'].shift(4)
    data['ts_lag1'] = data['time_step'].shift(1)
    data.loc[data['time_step'] == 0, 'ts_lag1'] = 0
    data['area'] = data['u_in'] * (data['time_step'] - data['ts_lag1'])
    data = data.fillna(0)
    data['sum_area'] = data.groupby('breath_id')['area'].cumsum()
    
    data['u_inout'] = data['u_in'] * data['u_out']
    data['u_in_time'] = data['u_in'] * data['time_step']
    data['u_out_time'] = data['time_step'] * data['u_out']
    
    data['rolling_10_mean'] = data.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
    data['rolling_10_std'] = data.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).std().reset_index(level=0,drop=True)
    data['expand_mean'] = data.groupby('breath_id')['u_in'].expanding(2).mean().reset_index(level=0,drop=True)
    data['expand_std'] = data.groupby('breath_id')['u_in'].expanding(2).std().reset_index(level=0,drop=True)
    data['ewm_u_in_mean'] = data.groupby('breath_id')['u_in'].ewm(halflife=10).mean().reset_index(level=0,drop=True)
    data['ewm_u_in_std'] = data.groupby('breath_id')['u_in'].ewm(halflife=10).std().reset_index(level=0,drop=True)
    data = data.fillna(0)
    
    data['R'] = data['R'].astype(str)
    data['C'] = data['C'].astype(str)
    data['RC'] = data['R'] + data['C']
    data = pd.get_dummies(data)
    
    return data

In [None]:
train = fe(train)
test = fe(test)

In [None]:
train.drop(['id', 'breath_id'], axis = 1, inplace = True)
test.drop(['id', 'breath_id'], axis = 1, inplace = True)

In [None]:
features = train.columns.tolist()
features = [col for col in features if col not in ['pressure']]

In [None]:
RS = RobustScaler()
train[features] = RS.fit_transform(train[features])
test[features] = RS.transform(test[features])

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop('pressure', axis = 1, inplace = True)
train = train.to_numpy().reshape(-1, 80, train.shape[-1])
test = test.to_numpy().reshape(-1, 80, test.shape[-1])

# MODEL

In [None]:
EPOCH = 300
BATCH_SIZE = 1024

tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    kf = KFold(n_splits = 5, shuffle = True, random_state = 42)
    test_preds = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        model = keras.models.Sequential([
            keras.layers.Input(shape = train.shape[-2:]),
            keras.layers.Bidirectional(keras.layers.LSTM(320, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(240, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(160, return_sequences = True)),
            keras.layers.Dense(80, activation = 'selu'),
            keras.layers.Dense(1),
        ])
        model.compile(optimizer = "adam", loss = "mae")

        scheduler = ExponentialDecay(1e-3, 400*((len(train)*0.8)/BATCH_SIZE), 1e-5)
        lr = LearningRateScheduler(scheduler, verbose = 1)

        model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs = EPOCH, batch_size = BATCH_SIZE, callbacks = [lr])

        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

In [None]:
ss['pressure'] = sum(test_preds) / 5
ss.to_csv('lstm7.csv', index = False)