# Imports

In [None]:
import pandas as pd
import numpy as np
import gc
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
from keras.utils.vis_utils import plot_model
from tensorflow import distribute
from tensorflow.keras import layers
from tensorflow.keras import backend
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Helper Function

In [None]:
def features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max')-df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['u_in_diff1'] = df['u_in']-df['u_in_lag1']
    df['u_out_diff1'] = df['u_out']-df['u_out_lag1']
    df['u_in_diff2'] = df['u_in']-df['u_in_lag2']
    df['u_out_diff2'] = df['u_out']-df['u_out_lag2']
    df['u_in_diff3'] = df['u_in']-df['u_in_lag3']
    df['u_out_diff3'] = df['u_out']-df['u_out_lag3']
    df['u_in_diff4'] = df['u_in']-df['u_in_lag4']
    df['u_out_diff4'] = df['u_out']-df['u_out_lag4']
    df['cross']= df['u_in']*df['u_out']
    df = df.fillna(0)
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str)+'__'+df["C"].astype(str)
    df = pd.get_dummies(df)
    return df

# Read and Pre-Process Data

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

pressure_values = np.sort(train.pressure.unique())
train = features(train)
test = features(test)
train = train.fillna(0)
test = test.fillna(0)
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train = train.drop(['pressure', 'id', 'breath_id', 'u_out', 'time_step'], axis=1)
test = test.drop(['id', 'breath_id', 'u_out', 'time_step'], axis=1)
cols = list(train.columns)

scaler = RobustScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

# Model

In [None]:
model = load_model('../input/ventilatorpretrained/fold1.hdf5')
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

In [None]:
del model
gc.collect()

epochs = 200
batch_size = 16
n_splits = 5
import_pre_trained = True
one_fold = True
create_sub = False

gpu_strategy = distribute.get_strategy()

with gpu_strategy.scope():
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    test_preds = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        backend.clear_session()
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        checkpoint_filepath = f"fold{fold+1}.hdf5"
        if import_pre_trained:
            model = load_model('../input/ventilatorpretrained/fold1.hdf5')
        else:
            model = Sequential([
                layers.Input(shape = train.shape[-2:]),
                layers.Bidirectional(layers.LSTM(512, return_sequences=True)),
                layers.Bidirectional(layers.LSTM(512, return_sequences=True)),
                layers.Dense(64, activation='selu'),
                layers.Dense(1)])
        optimizer = Adam(learning_rate=0.0002)
        model.compile(optimizer=optimizer, loss="mae")
        lr = ReduceLROnPlateau(factor=0.5, patience=3, verbose=1)
        es = EarlyStopping(patience=6, restore_best_weights=True)
        sv = ModelCheckpoint(checkpoint_filepath, save_best_only=True, verbose=1)
        print(f'-------------> Fold {fold+1} <-------------')
        model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
                  epochs=epochs, batch_size=batch_size, callbacks=[lr, es, sv])
        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())
        del model, X_train, X_valid
        gc.collect()
        if one_fold:
            break

if create_sub:
    pressure_min = pressure_values[0]
    pressure_max = pressure_values[-1]
    pressure_step = pressure_values[1] - pressure_values[0]
    submission["pressure"] = np.median(np.vstack(test_preds),axis=0)
    submission['pressure'] = np.round((submission['pressure']-pressure_min)/pressure_step)*pressure_step+pressure_min
    submission['pressure'] = np.clip(submission['pressure'], pressure_min, pressure_max)
    submission.to_csv('submission.csv', index=False)