In [None]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
import random
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
import tensorflow as tf, gc
from tensorflow import keras
import tensorflow.keras.backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold
%matplotlib inline

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
seed_everything(56)

class config:
    paths = {'train': '../input/ventilator-pressure-prediction/train.csv',
             'test' : '../input/ventilator-pressure-prediction/test.csv',
             'ss'   : '../input/ventilator-pressure-prediction/sample_submission.csv', }
    
    model_params = {'is_train':True, 'debug':False, 'EPOCH':300, 'BATCH_SIZE':1024, 'NUM_FOLDS':8,}
    
    post_processing = {'max_pressure': 64.82099173863948, 'min_pressure': -1.8957442945646408,
                       'diff_pressure': 0.07030215, }

train = pd.read_csv(config.paths["train"])
test = pd.read_csv(config.paths["test"])
submission = pd.read_csv(config.paths["ss"])

if config.model_params["debug"]:
    print("[INFO] Debug Mode...")
    train = train[:80*1000]
    config.model_params["EPOCH"] = 20
    config.model_params["BATCH_SIZE"] = 128
    config.model_params["NUM_FOLDS"] = 5

def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    return df

train = add_features(train)
test = add_features(test)

targets = train[['pressure']].to_numpy().reshape(-1, 80)

train.drop(['pressure', 'id', 'breath_id'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id'], axis=1)

RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

def create_model(strategy):   
    with strategy.scope():
        model = Sequential([keras.layers.Input(shape=train.shape[-2:]),
                            keras.layers.Bidirectional(keras.layers.LSTM(1024, return_sequences=True)),
                            keras.layers.Bidirectional(keras.layers.LSTM(1024, return_sequences=True)),
                            keras.layers.Bidirectional(keras.layers.LSTM(512, return_sequences=True)),
                            keras.layers.Bidirectional(keras.layers.LSTM(256, return_sequences=True)),
                            keras.layers.Dense(128, activation='selu'),
                            #keras.layers.Dropout(0.1),
                            keras.layers.Dense(1), ])
        model.compile(optimizer='adam', loss='mae')
    return model

def plot_hist(hist):
    plt.plot(hist.history["loss"])
    plt.plot(hist.history["val_loss"])
    plt.title("model performance")
    plt.ylabel("mean_absolute_error")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

In [None]:
print(tf.version.VERSION)
try: 
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: 
    strategy = tf.distribute.MirroredStrategy() 
    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    tf.config.optimizer.set_jit(True) # XLA compilation
    tf.keras.mixed_precision.experimental.set_policy(policy)
    print('Mixed precision enabled')
print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
kf = KFold(n_splits=config.model_params["NUM_FOLDS"], shuffle=True, random_state=56)
test_preds = []

for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
    K.clear_session()
    print(f"\nFOLD: {fold}")
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = targets[train_idx], targets[test_idx]
    
    checkpoint_filepath = f"./folds_{fold}.hdf5"
    lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=0)
    es = EarlyStopping(monitor="val_loss", patience=60, verbose=0, mode="min", restore_best_weights=True)
    sv = keras.callbacks.ModelCheckpoint(checkpoint_filepath, monitor='val_loss', verbose=0, 
                                         save_best_only=True, save_weights_only=False, mode='auto',
                                         save_freq='epoch')
    
    model = create_model(strategy)
        
    history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
                        epochs=config.model_params["EPOCH"], batch_size=config.model_params["BATCH_SIZE"], 
                        callbacks = [lr, es, sv])
    
    test_preds.append(model.predict(test, batch_size=config.model_params["BATCH_SIZE"],
                                    verbose=1).squeeze().reshape(-1, 1).squeeze())
    plot_hist(history)
    del X_train, X_valid, y_train, y_valid, model
    gc.collect()

In [None]:
submission["pressure"] = np.median(np.vstack(test_preds), axis=0)
submission.to_csv('submission_raw.csv', index=False)
#submission["pressure"] = np.round((submission.pressure - config.post_processing["min_pressure"])/config.post_processing["diff_pressure"]) * config.post_processing["diff_pressure"] + config.post_processing["min_pressure"]
#submission.pressure = np.clip(submission.pressure, config.post_processing["min_pressure"], config.post_processing["max_pressure"])
#display(submission.head())
#submission.to_csv('submission.csv', index=False)