In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

from tensorflow import keras
import tensorflow as tf

In [1]:
from sklearn.preprocessing import RobustScaler
rb = RobustScaler()

In [1]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

**Data Analysis & Featured Engneering**

In [1]:
def plot_double_bid(bid):
    fig = plt.figure(figsize = (12, 4))
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    tmp = train.loc[train['breath_id'] == bid]

    R = tmp['R'].reset_index(drop=True)[0]
    C = tmp['C'].reset_index(drop=True)[0]
    ax1.set_title(f'bread_id:{bid}, R:{R}, C:{C}')
    ax1.set_ylabel('Timestep')
    ax1.set_xlabel('Row No.')

    ax1.plot(train.loc[train['breath_id'] == bid]['time_step'].tolist())
    
    ax3 = ax2.twinx()

    ax2.plot(tmp['time_step'], tmp['pressure'], 'r-', label='pressure')
    ax2.plot(tmp['time_step'], tmp['u_in'], 'g-', label='u_in')
    ax3.plot(tmp['time_step'], tmp['u_out'], 'b-', label='u_out')

    ax2.set_xlabel('Timestep')
    
    R = tmp['R'].reset_index(drop=True)[0]
    C = tmp['C'].reset_index(drop=True)[0]
    ax2.set_title(f'bread_id:{bid}, R:{R}, C:{C}')

    ax2.legend(loc=(1.1, 0.8))
    ax3.legend(loc=(1.1, 0.7))
    
    fig.tight_layout()
    plt.show()

plot_double_bid(28942)
plot_double_bid(2)
plot_double_bid(32900)

**Featured Engneering**

In [1]:
train['u_in_lag'] = train.groupby('breath_id')['u_in'].shift(1)
train['u_out_lag'] = train.groupby('breath_id')['u_out'].shift(1)
train['u_in_lag_2'] = train.groupby('breath_id')['u_in'].shift(2)
train['u_out_lag_2'] = train.groupby('breath_id')['u_out'].shift(2)
train['u_in_diff'] = train['u_in'] - train['u_in_lag']
train['u_in_diff_2'] = train['u_in'] - train['u_in_lag_2']
train['u_in_cumsum'] = train['u_in'].groupby(train['breath_id']).cumsum()
train['R*C'] = train['R'] * train['C']
train['RC_sum'] = train['R'] + train['C']
train['RC_div'] = train['R'] / train['C']
train['R'] = train['R'].astype(str)
train['C'] = train['C'].astype(str)
train = pd.get_dummies(train)

test['u_in_lag'] = test.groupby('breath_id')['u_in'].shift(1)
test['u_out_lag'] = test.groupby('breath_id')['u_out'].shift(1)
test['u_in_lag_2'] = test.groupby('breath_id')['u_in'].shift(2)
test['u_out_lag_2'] = test.groupby('breath_id')['u_out'].shift(2)
test['u_in_diff'] = test['u_in'] - test['u_in_lag']
test['u_in_diff_2'] = test['u_in'] - test['u_in_lag_2']
test['u_in_cumsum'] = test['u_in'].groupby(test['breath_id']).cumsum()
test['R*C'] = test['R'] * test['C']
test['RC_sum'] = test['R'] + test['C']
test['RC_div'] = test['R'] / test['C']
test['R'] = test['R'].astype(str)
test['C'] = test['C'].astype(str)
test = pd.get_dummies(test)

train = train.fillna(0)
test = test.fillna(0)

In [1]:
train

In [1]:
train.columns

**Droping Unwanted Column**

In [1]:
targets = train['pressure'].to_numpy().reshape(-1, 80)
test_id = test['id']
train.drop(columns = ['id','breath_id','pressure'], inplace = True)
test.drop(columns = ['id','breath_id'], inplace = True)

**Scaling Data using RobustScaler**

In [1]:
rb.fit(train)

train_new = rb.transform(train)
test_new = rb.transform(test)

train_re = train_new.reshape(-1, 80, 19)
test_re = test_new.reshape(-1, 80, 19)

In [1]:
print(train_re.shape,targets.shape)

**Model Building**

In [1]:
opt = tf.keras.optimizers.Adam(learning_rate = 0.002)

In [1]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True), input_shape=(80, 19)))

    model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(32, activation='selu', kernel_initializer='uniform'))
    model.add(tf.keras.layers.Dense(1))
    
    model.compile(optimizer = opt, loss = 'mae', metrics=['mae'])

    model.summary()
    
    return model

#model = tf.keras.models.load_model('')
model = build_model()

**Model Training**

In [1]:
call_back = tf.keras.callbacks.ModelCheckpoint("ModelLSTM.h5", monitor='val_loss', save_best_only=True)
history = model.fit(train_re, targets, callbacks=[call_back], batch_size = 64, epochs = 300, validation_split = 0.2, verbose = 1)

In [1]:
def show_graph(history):
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title('model mae')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper right')
    plt.show()
    
show_graph(history)

**Model Evaluation**

In [1]:
model.evaluate(train_re,targets)

**Model Prediction**

In [1]:
pred = model.predict(test_re)
pred = pred.reshape(-1,1)

In [1]:
def write_prediction(prediction,test_id):
    PassengerId = np.array(test_id).astype(int)
    solution = pd.DataFrame(prediction, PassengerId, columns = ['pressure'])
    solution.to_csv('./submission.csv', index_label = ['id'])
    
write_prediction(pred,test_id)