#### Based on this notebook:<br>
https://www.kaggle.com/dmitryuarov/ventilator-pressure-eda-lstm-0-189/

In [None]:
#https://www.kaggle.com/dmitryuarov/ventilator-pressure-eda-lstm-0-189/notebook

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import RobustScaler
from plotly.subplots import make_subplots
import plotly.graph_objects as go

pd.set_option('display.max_columns', None)

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
ss = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
train.info()

In [None]:
test.info()

In [None]:
def prepare_set(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    df['u_in_1st_derivative'] = (df['u_in'].diff().fillna(0) / df['time_step'].diff().fillna(0)).fillna(0)
    #df['u_in_1st_der_cumsum'] =  (df['u_in_1st_derivative']).groupby(df['breath_id']).cumsum()
    df['expand_mean_1sr_der'] = df.groupby('breath_id')['u_in_1st_derivative'].expanding(2).mean().reset_index(level=0,drop=True)
    #df['expand_max_1sr_der'] = df.groupby('breath_id')['u_in_1st_derivative'].expanding(2).max().reset_index(level=0,drop=True) 
    #df['expand_std_1sr_der'] = df.groupby('breath_id')['u_in_1st_derivative'].expanding(2).std().reset_index(level=0,drop=True)
    #df['u_in_1st_der_mean5'] = df.groupby('breath_id')['u_in_1st_derivative'].rolling(window=5, min_periods=1).mean().reset_index(level=0,drop=True)
    df['u_in_1st_der_mean10'] = df.groupby('breath_id')['u_in_1st_derivative'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
                
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2).fillna(0).reset_index(level=0,drop=True)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4).fillna(0).reset_index(level=0,drop=True)
    df['u_in_lag-2'] = df.groupby('breath_id')['u_in'].shift(-2).fillna(0).reset_index(level=0,drop=True)
    df['u_in_lag-4'] = df.groupby('breath_id')['u_in'].shift(-4).fillna(0).reset_index(level=0,drop=True)  
        
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df = pd.get_dummies(df)
       
    df['ewm_u_in_mean'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).mean().reset_index(level=0,drop=True)
    df['ewm_u_in_std'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).std().reset_index(level=0,drop=True)
    df['ewm_u_in_corr'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).corr().reset_index(level=0,drop=True)
    
    #df['rolling_5_mean'] = df.groupby('breath_id')['u_in'].rolling(window=5, min_periods=1).mean().reset_index(level=0,drop=True)
    #df['rolling_5_max'] = df.groupby('breath_id')['u_in'].rolling(window=5, min_periods=1).max().reset_index(level=0,drop=True)
    #df['rolling_5_std'] = df.groupby('breath_id')['u_in'].rolling(window=5, min_periods=1).std().reset_index(level=0,drop=True)
    
    df['rolling_10_mean'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
    df['rolling_10_max'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).max().reset_index(level=0,drop=True)
    df['rolling_10_std'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).std().reset_index(level=0,drop=True)
             
    df['expand_mean'] = df.groupby('breath_id')['u_in'].expanding(2).mean().reset_index(level=0,drop=True)
    df['expand_max'] = df.groupby('breath_id')['u_in'].expanding(2).max().reset_index(level=0,drop=True)
    df['expand_std'] = df.groupby('breath_id')['u_in'].expanding(2).std().reset_index(level=0,drop=True)
    
    df['delta_u_in'] = abs(df.groupby(df['breath_id'])['u_in'].diff().fillna(0)).reset_index(level=0,drop=True)
    df['delta_u_in_exp'] = df.groupby(df['breath_id'])['delta_u_in'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
    df['delta_rolling_10_mean'] = df.groupby('breath_id')['delta_u_in'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
    df['delta_rolling_10_max'] = df.groupby('breath_id')['delta_u_in'].rolling(window=10, min_periods=1).max().reset_index(level=0,drop=True)
    #df['delta_rolling_10_std'] = df.groupby('breath_id')['delta_u_in'].rolling(window=10, min_periods=1).std().reset_index(level=0,drop=True)
    
    #df['area_10_mean_exp']=(df['rolling_10_mean']*df['time_step']).expanding(2).mean().reset_index(level=0,drop=True)     
   
    df['work']=((df['u_in'] + df['u_in'].shift(1).fillna(0))/2 * df['time_step'].diff().fillna(0)).clip(0,)
    df['work_roll_10']=df.groupby(df['breath_id'])['work'].rolling(window=10, min_periods=1).sum().reset_index(level=0,drop=True)
    df['work_roll_15']=df.groupby(df['breath_id'])['work'].rolling(window=15, min_periods=1).sum().reset_index(level=0,drop=True)
      
    df['u_in_rol_q0.1'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1).quantile(0.1).reset_index(level=0,drop=True)
    df['u_in_rol_q0.25'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1).quantile(0.25).reset_index(level=0,drop=True)
    df['u_in_rol_q0.5'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1).quantile(0.5).reset_index(level=0,drop=True)
    df['u_in_rol_q0.75'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1).quantile(0.75).reset_index(level=0,drop=True)
    df['u_in_rol_q0.9'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1).quantile(0.9).reset_index(level=0,drop=True)  
        
    df = df.fillna(0)
    
    return df

In [None]:
train=prepare_set(train)
train.shape

In [None]:
test=prepare_set(test)
test.shape

In [None]:
#plt.figure(figsize=(20,20))
#sns.heatmap(train.drop([ 'id', 'breath_id'], axis=1).corr(), annot=True)

In [None]:
train.drop([ 'id', 'breath_id'], axis=1).corr().pressure.sort_values(ascending=False)

In [None]:
sample=train.sample(4)
breath_id=list(sample['breath_id']) 

for ID in breath_id:
    case = train[train.breath_id == ID]
      
    fig = make_subplots(rows=1, cols=1, x_title = "Time", subplot_titles=[f'Breath id: {ID}'])
    fig.add_trace(go.Scatter(x=case['time_step'], y=case['u_in'], name='u_in'), row=1, col=1)
    fig.add_trace(go.Scatter(x=case['time_step'], y=case['pressure'], name='pressure'), row=1, col=1)
    fig.add_trace(go.Scatter(x=case['time_step'], y=case['u_in_rol_q0.75'], name='u_in_rol_q0.75'), row=1, col=1) 
    fig.add_trace(go.Scatter(x=case['time_step'], y=case['rolling_10_mean'], name='rolling_10_mean'), row=1, col=1)
    fig.add_trace(go.Scatter(x=case['time_step'], y=case['work_roll_15'], name='work_roll_15'), row=1, col=1)
   
    fig.show()

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure', 'id', 'breath_id'], axis = 1, inplace = True)
test = test.drop(['id', 'breath_id'], axis = 1)

In [None]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [None]:
#from sklearn.decomposition import PCA
#pca=PCA(n_components=0.995)
#train = pca.fit_transform(train)
#test = pca.transform(test)

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

In [None]:
#in case of TPU run
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)

In [None]:
#LSTM/GRU ensemble
def get_model():       
    inputs = keras.Input(shape = train.shape[-2:])
    
    x = layers.Bidirectional(layers.GRU(256, return_sequences = True))(inputs)
    x = layers.Bidirectional(layers.GRU(192, return_sequences = True))(x)
    x = layers.Bidirectional(layers.GRU(128, return_sequences = True))(x) 
    x = layers.Bidirectional(layers.GRU(96, return_sequences = True))(x) 
    
    y = layers.Bidirectional(layers.LSTM(256, return_sequences = True))(inputs)
    y = layers.Bidirectional(layers.LSTM(192, return_sequences = True))(y)
    y = layers.Bidirectional(layers.LSTM(128, return_sequences = True))(y) 
    y = layers.Bidirectional(layers.LSTM(96, return_sequences = True))(y) 
    
    combined = layers.Concatenate()([x,y])
    
    out = layers.Dense(128, activation = 'swish')(combined)
    out = layers.Dense(1)(out)
    
    model = keras.Model(inputs, out)
    
    model.compile(optimizer = "adam", loss = "mae")
           
    return model  

In [None]:
def get_model():   
    
    model = tf.keras.models.Sequential([
            layers.Input(shape = train.shape[-2:]),
            layers.Bidirectional(layers.GRU(512, return_sequences = True)),
            layers.Bidirectional(layers.GRU(256, return_sequences = True)),
            layers.Bidirectional(layers.GRU(192, return_sequences = True)),
            layers.Bidirectional(layers.GRU(128, return_sequences = True)),
            layers.Dense(64, activation = 'selu'),
            layers.Dense(1),
        ])
    model.compile(optimizer = "adam", loss = "mae")
    
    return model  

In [None]:
def get_model():       
    inputs = keras.Input(shape = train.shape[-2:])
    
    x = layers.Bidirectional(layers.GRU(512, return_sequences = True))(inputs)
    x = layers.Bidirectional(layers.GRU(384, return_sequences = True))(x)
        
    y = layers.Bidirectional(layers.GRU(256, return_sequences = True))(x) 
    
    z = layers.Bidirectional(layers.GRU(192, return_sequences = True))(y) 
        
    combined = layers.Concatenate()([x,y,z])
    
    out = layers.Dense(128, activation = 'swish')(combined)
    out = layers.Dense(1)(out)
    
    model = keras.Model(inputs, out)
    
    model.compile(optimizer = "adam", loss = "mae")
           
    return model  

In [None]:
get_model().summary()

In [None]:
tf.keras.utils.plot_model(get_model(), show_shapes=True)

In [None]:
EPOCH = 300
BATCH_SIZE = 2048
n_folds=5

with strategy.scope():
    kf = KFold(n_splits = n_folds, shuffle = True, random_state = 0)
    test_preds = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f'Fold {fold+1}/{n_folds}', '<', '-'*15)
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        model = get_model()

        estop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, verbose=0, mode='min',restore_best_weights=True)
        scheduler = keras.optimizers.schedules.ExponentialDecay(3e-3, 40*((len(train))/BATCH_SIZE), 1e-4)
        lr = keras.callbacks.LearningRateScheduler(scheduler, verbose = 1)

        model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs = EPOCH, batch_size = BATCH_SIZE, callbacks = [lr, estop])

        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

In [None]:
ss['pressure'] = sum(test_preds) / 5
ss.to_csv('submission.csv', index = False)