In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import time, logging, gc
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import *
from tensorflow.keras.callbacks import *

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
pressure_unique = np.array(sorted(train['pressure'].unique()))

In [None]:
train

In [None]:
test

In [None]:
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['u_in_lag'] = df['u_in_lag']*df['breath_id_lagsame']
    df['u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['u_in_lag2'] = df['u_in_lag2']*df['breath_id_lag2same']
    df['u_out_lag2'] = df['u_out'].shift(2).fillna(0)
    df['u_out_lag2'] = df['u_out_lag2']*df['breath_id_lag2same']
    #df['u_in_lag'] = df['u_in'].shift(2).fillna(0)
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['RC'] = df['R']+df['C']
    #df = pd.get_dummies(df)
    return df

train = add_features(train)
test = add_features(test)

In [None]:
y = train['pressure'].to_numpy().reshape(-1, 80)
train.drop(['pressure','id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1)

In [None]:
train

In [None]:
rb = RobustScaler()

rb.fit(train)
train = rb.transform(train)
test = rb.transform(test)

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])
gc.collect

In [None]:
# Detect hardware, return appropriate distribution strategy
print(tf.version.VERSION)
tf.get_logger().setLevel(logging.ERROR)
try: # detect TPU
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # detect GPU(s) and enable mixed precision
    strategy = tf.distribute.MirroredStrategy() # works on GPU and multi-GPU
    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    tf.config.optimizer.set_jit(True) # XLA compilation
    tf.keras.mixed_precision.experimental.set_policy(policy)
    print('Mixed precision enabled')
print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
def plot_hist(hist):
    plt.plot(hist.history["loss"])
    plt.plot(hist.history["val_loss"])
    plt.title("model performance")
    plt.ylabel("mean_absolute_error")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

In [None]:
def create_model():   
    with strategy.scope():
    
        model = Sequential([
            
            Input(shape=(80, 13)),
            Bidirectional(LSTM(700, return_sequences=True)),
            Bidirectional(LSTM(512, return_sequences=True)),
            Bidirectional(LSTM(256, return_sequences=True)),
            Bidirectional(LSTM(128, return_sequences=True)),
            Dense(128, activation='selu'),
            Dense(1)
        ])

        model.compile(optimizer="adam",loss = "mae")
    return(model)

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

test_preds = []

for fold, (train_idx, test_idx) in enumerate(kf.split(train, y)):
    print(f"****** fold: {fold+1} *******")
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = y[train_idx], y[test_idx]
    
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 200*((len(train)*0.8)/512), 1e-5)
    es = EarlyStopping(monitor='val_loss',mode='min', patience=35, verbose=1,restore_best_weights=True)
    
    model = create_model()
        
    history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=300, batch_size = 256, callbacks = [es,tf.keras.callbacks.LearningRateScheduler(scheduler)])
    test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())
    plot_hist(history)
    del X_train, X_valid, y_train, y_valid, model
    gc.collect()   

In [None]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = sum(test_preds)/5  #test_preds[1]
submission.to_csv('submission_mean.csv', index=False)
submission    

In [None]:
# ENSEMBLE FOLDS WITH MEDIAN
submission["pressure"] = np.median(np.vstack(test_preds),axis=0)
submission

In [None]:
# ROUND PREDICTIONS (Post Preprocessing)
submission['pressure'] = submission['pressure'].map(lambda x: pressure_unique[np.abs(pressure_unique-x).argmin()])
submission.to_csv('submission_post_preprocessing.csv', index=False)
submission

<h4>References:</h4>

1. https://www.kaggle.com/tolgadincer/tensorflow-bidirectional-lstm-0-234 <br>
2. https://www.kaggle.com/junhyeok99/tensorflow <br>
3. https://www.kaggle.com/kensit/improvement-base-on-tensor-bidirect-lstm-0-173
4. Using ensemble folds with median instead of mean from this [kernel](https://www.kaggle.com/cdeotte/ensemble-folds-with-median-0-153/notebook) 
5. Post preprocessing from the [kernel](https://www.kaggle.com/snnclsr/a-dummy-approach-to-improve-your-score-postprocess/notebook) using the method of [columbia2131](https://www.kaggle.com/columbia2131) (He made a comment in the notebook contains the code with better way)