In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Prepare data

In [4]:
train_data = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/train.csv',index_col=0)
test_data = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/test.csv',index_col=0)

Feature engineering

In [5]:
def add_attributes(df):
    
    def count_consecutive_out(y):
        return y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
    
    C_COEFF_DIV = 1000
    df['time_step_prev'] = df.groupby('breath_id')['time_step'].shift(1).fillna(0)
    df['time_delta'] = df['time_step']-df['time_step_prev']
    
    df['u_in_prev'] = df.groupby('breath_id')['u_in'].shift(1).fillna(0)
    df['u_out_prev'] = df.groupby('breath_id')['u_out'].shift(1).fillna(0)
    
    df['u_in_delta'] = ((df['u_in_prev']+df['u_in'])* df['time_delta'])/2    
    df['coeff'] = 1/df['R'] + df['C']/C_COEFF_DIV#higher R -> harder to blow, higher C -> easier to blow
    df['u_in_delta_coeff'] = df['u_in_delta']*df['coeff']
    
    df['u_out_consec'] = df.groupby('breath_id')['u_out'].transform(count_consecutive_out)
     
    #df.drop([], axis=1, inplace=True)
    return df

In [6]:
train_data = add_attributes(train_data)
train_data.head(10)

In [7]:
y_train = train_data['pressure']
X_train = train_data.drop(['pressure','breath_id'],axis=1)

In [8]:
test_data = add_attributes(test_data)
X_test = test_data.drop(['breath_id'],axis=1)

In [9]:
from sklearn.preprocessing import RobustScaler
RS = RobustScaler()
X_train = RS.fit_transform(X_train)
X_test = RS.transform(X_test)

In [10]:
STEPS = 80
N_FEATURES = X_train.shape[1]

In [11]:
y_train = y_train.values.reshape(-1,STEPS)
X_train = X_train.reshape(-1,STEPS,N_FEATURES)
X_test = X_test.reshape(-1,STEPS,N_FEATURES)

# Prepare model

In [12]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import InputLayer,Dense,LSTM,Dropout,Flatten,Bidirectional

In [13]:
def get_model():
    model = keras.Sequential([
        InputLayer(input_shape=X_train.shape[-2:]),
        Bidirectional(LSTM(200, return_sequences=True)),
        Bidirectional(LSTM(150, return_sequences=True)),
        Bidirectional(LSTM(100, return_sequences=True)),
        #Flatten(),
        Dense(200, activation="swish"),
        Dropout(0.2),
        Dense(150, activation="swish"),
        #Dropout(0.2),
        Dense(1, activation="relu")
    ])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),loss='mean_squared_error',metrics=['mse','mae'])
    return model

In [14]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    model = get_model()

In [15]:
callback_1 = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=30, batch_size=512, callbacks=[callback_1])

In [16]:
plt.figure(figsize=(18,8))
loss=history.history['loss']
val_loss=history.history['val_loss']
epoch = range(1,len(loss)+1)
plt.plot(epoch, loss, 'r' , label="Training")
plt.plot(epoch, val_loss, 'b', label="Validation")
plt.legend()
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.show()

# Predict test pressures

In [17]:
y_pred_test = model.predict(X_test)

In [22]:
output = pd.DataFrame({'id': test_data.index, 'pressure': y_pred_test.squeeze().reshape(-1, 1).squeeze()})
output.to_csv('submission.csv', index=False)