# LSTM Model with using Tensorflow

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import gc

from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import *
from tensorflow.keras.callbacks import *

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
train

In [None]:
train = train.drop(columns = 'id')
test = test.drop(columns = 'id')

In [None]:
train['RC_sum'] = train['R'] + train['C']
train['RC_div'] = train['R'] / train['C']
train['u_in_cumsum'] = (train['u_in']).groupby(train['breath_id']).cumsum()
train['time_lag'] = train['time_step'].shift(1).fillna(0)
train['u_in_lag'] = train['u_in'].shift(1).fillna(0)
train['u_out_lag'] = train['u_out'].shift(1).fillna(0)

test['RC_sum'] = test['R'] + test['C']
test['RC_div'] = test['R'] / test['C']
test['u_in_cumsum'] = (test['u_in']).groupby(test['breath_id']).cumsum()
test['time_lag'] = test['time_step'].shift(1).fillna(0)
test['u_in_lag'] = test['u_in'].shift(1).fillna(0)
test['u_out_lag'] = test['u_out'].shift(1).fillna(0)

train['R'] = train['R'].astype(str)
train['C'] = train['C'].astype(str)

test['R'] = test['R'].astype(str)
test['C'] = test['C'].astype(str)

train = pd.get_dummies(train)
test = pd.get_dummies(test)

y = train['pressure'].to_numpy().reshape(-1, 80)

train.drop(columns = ['pressure', 'breath_id'], inplace = True)
test.drop(columns = 'breath_id', inplace = True)

In [None]:
from sklearn.preprocessing import RobustScaler

rb = RobustScaler()

rb.fit(train)
train2 = rb.transform(train)
test2 = rb.transform(test)

In [None]:
train3 = train2.reshape(75450, 80, 15)
test3 = test2.reshape(50300, 80, 15)

del train, test, train2, test2, rb
gc.collect

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

test_preds = []

for fold, (train_idx, test_idx) in enumerate(kf.split(train3, y)):
    X_train, X_valid = train3[train_idx], train3[test_idx]
    y_train, y_valid = y[train_idx], y[test_idx]
    
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 200*((len(train3)*0.8)/1024), 1e-5)
    
    model = Sequential([
        Input(shape=(80, 15)),
        Bidirectional(LSTM(200, return_sequences=True)),
        Bidirectional(LSTM(150, return_sequences=True)),
        Bidirectional(LSTM(100, return_sequences=True)),
        Dense(100, activation='relu'),
        Dropout(0.2),
        Dense(1),
    ])
    
    model.compile(optimizer="adam", loss="mae")
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=200, batch_size = 512, callbacks = [tf.keras.callbacks.LearningRateScheduler(scheduler)])

    test_preds.append(model.predict(test3).squeeze().reshape(-1, 1).squeeze())
    
    del X_train, X_valid, y_train, y_valid, model
    gc.collect()

In [None]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = sum(test_preds)/5
submission.to_csv('submission.csv', index=False)
submission