In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow.keras import layers, initializers

from sklearn.model_selection import KFold

In [None]:
from sklearn.preprocessing import RobustScaler
rb = RobustScaler()

In [None]:
train_path = "../input/ventilator-pressure-prediction/train.csv"
test_path = "../input/ventilator-pressure-prediction/test.csv"
sample_sub = "../input/ventilator-pressure-prediction/sample_submission.csv"

In [None]:
def dropCols(df, cols):
    df = df.copy()
    df.drop(cols, axis=1, inplace=True)
    return df

In [None]:
def preProcess(df):   
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df = df.fillna(0)
       
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    return df

In [None]:
train_data = pd.read_csv(train_path)
train_data = preProcess(train_data)

In [None]:
def draw(x=0, xx=80):
    plt.figure(figsize=(20,8))
    plt.plot(train_data['id'][x:xx], train_data['pressure'][x:xx], label="pressure", c='r')
    plt.plot(train_data['id'][x:xx], train_data['u_in'][x:xx], label="u_in", c='g')
    plt.plot(train_data['id'][x:xx], train_data['r_c'][x:xx], label="r_c", c='b')
    plt.legend()
    plt.plot()

In [None]:
cols_2_drop = ['id', 'breath_id', 'time_step']

In [None]:
train_df = dropCols(train_data, cols_2_drop)

Y = train_df.pop('pressure')

In [None]:
rb.fit(train_df)

train_df = rb.transform(train_df)

In [None]:
train_df = train_df.reshape(-1, 80, train_df.shape[-1])
Y = Y.values.reshape(-1, 80, 1)

In [None]:
train_df.shape, Y.shape

In [None]:
def build_model():
    model = tf.keras.Sequential()
    model.add(layers.Bidirectional(layers.LSTM(440, return_sequences=True, kernel_initializer=initializers.GlorotNormal(),), input_shape=[80, train_df.shape[-1]]))
    model.add(layers.Bidirectional(layers.LSTM(360, dropout=0.2, return_sequences=True, kernel_initializer= initializers.GlorotNormal())))
    model.add(layers.Bidirectional(layers.LSTM(260, dropout=0.2, return_sequences=True, kernel_initializer= initializers.GlorotNormal())))
    model.add(layers.Bidirectional(layers.LSTM(180, dropout=0.2, return_sequences=True, kernel_initializer= initializers.GlorotNormal())))
    model.add(layers.Bidirectional(layers.LSTM(100, return_sequences=True, kernel_initializer=initializers.GlorotNormal())))
    
    model.add(layers.TimeDistributed(layers.Dense(64, activation='relu', kernel_initializer=tf.keras.initializers.HeNormal())))
    model.add(layers.TimeDistributed(layers.Dense(1, kernel_initializer=tf.keras.initializers.HeNormal())))
    
    
    opt = tf.keras.optimizers.Adam(learning_rate=0.003)
    
    model.compile(optimizer=opt, loss='mae', metrics=["mae"])
    return model

In [None]:
def scheduler(epoch, lr):
    print(f"EPOCH : {epoch} LEARNING RATE : {lr}")
    if epoch>50:
        return (0.98 ** (epoch-49) ) * 0.005
    else:
        return lr
    
callback1 = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
EPOCH = 300
BATCH_SIZE = 512

# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    kf = KFold(n_splits=5, shuffle=True)
    for fold, (train_idx, test_idx) in enumerate(kf.split(train_df, Y)):
        print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
        X_train, X_valid = train_df[train_idx], train_df[test_idx]

        y_train, y_valid = Y[train_idx], Y[test_idx]

        model = build_model()

        # A callback to save the model
        callback0 = tf.keras.callbacks.ModelCheckpoint(f"PressurePreModel{fold+1}.h5", 
                                               monitor='val_mae',save_best_only=True, verbose=1)

        his = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[callback0, callback1])
        stats = pd.DataFrame(his.history)
        stats.plot()
        plt.show()
        print("\n\n")

In [None]:
'''
test_data = pd.read_csv(test_path)
test_data = preProcess(test_data)

test_data = dropCols(test_data, cols_2_drop)

test_data = test_data.values.reshape(-1, 80, test_data.shape[-1])
'''

In [None]:
'''
p = model.predict(test_data)

submission_file = pd.read_csv("../input/ventilator-pressure-prediction/sample_submission.csv")
submission_file['pressure'] = p.reshape(-1, )
submission_file.to_csv('submission.csv', index=False)
'''