In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import os 
import time 
import json 
import requests 
from tqdm import tqdm 
import wandb 
from wandb.keras import WandbCallback 
from kaggle_secrets import UserSecretsClient 
import random 
from typing import Tuple 
import gc 

from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import LSTM, Input, Bidirectional, Dense 
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K


def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything()
pd.set_option("display.max_columns", None)

In [None]:
config = dict(
    competition = "ventilator", 
    infra = "kaggle", 
    train = True, 
    type = "train", 
    debug = False, 
    inference = True, 
    
    model_name = "lstm", 
    frame_word = "tensorflow", 
    device = "tpu", 
    n_fold = 5, 
    early_stopping_rounds = 30, 
    batch_size = 1024, 
    epoch = 299, 
    verbose = 100, 
    seed = 42 
)


In [None]:
user_secrets = UserSecretsClient()
url = user_secrets.get_secret("WEB_HOOK_URL") 

user_secrets = UserSecretsClient()
api = user_secrets.get_secret("wandb_api")

wandb.login(key=api)

run = wandb.init(
    project = config["competition"], 
    name = config["model_name"], 
    config = config, 
    group = config["model_name"], 
    job_type = config["type"]
)

def slack(txt):
    requests.post(url, data=json.dumps({
        "username": "kaggle", 
        "text": txt 
    }))

In [None]:
if config["debug"]:
    train = pd.read_csv("../input/ventilator-pressure-prediction/train.csv", nrows=80*100)
    test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv", nrows=80*100)
else:
    train = pd.read_csv("../input/ventilator-pressure-prediction/train.csv")
    test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv")

sort = np.sort(train.pressure.unique())
PRESSURE_MIN = sort[0]
PRESSURE_MAX = sort[-1]
PRESSURE_STEP = sort[1] - sort[0]

In [None]:
def reduce_mem_usage(train_data):
    start_mem = train_data.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    for col in train_data.columns:
        col_type = train_data[col].dtype

        if col_type != object:
            c_min = train_data[col].min()
            c_max = train_data[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    train_data[col] = train_data[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    train_data[col] = train_data[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    train_data[col] = train_data[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    train_data[col] = train_data[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    train_data[col] = train_data[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    train_data[col] = train_data[col].astype(np.float32)
                else:
                    train_data[col] = train_data[col].astype(np.float64)
        else:
            train_data[col] = train_data[col].astype('category')

    end_mem = train_data.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return train_data

In [None]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [None]:
def lag_feature(df) -> pd.DataFrame:
    df["area"] = df.time_step * df.u_in 
    df["area"] = df.groupby("breath_id")["area"].cumsum()
    
    df["u_in_cumsum"] = df.groupby("breath_id")["u_in"].cumsum()
    
    for i in range(4):
        df["u_in_"+f"lag{i+1}"] = df.groupby("breath_id")["u_in"].shift(i+1).fillna(0)
        df["u_out_"+f"lag{i+1}"] = df.groupby("breath_id")["u_out"].shift(i+1).fillna(0)

        df["u_in_"+f"back{i+1}"] = df.groupby("breath_id")["u_in"].shift((-1)*(i+1)).fillna(0)
        df["u_out_"+f"back{i+1}"] = df.groupby("breath_id")["u_out"].shift((-1)*(i+1)).fillna(0)

    df["u_out_rolling_10"] = df.groupby("breath_id")["u_out"].rolling(window=10).mean().reset_index(drop=True).fillna(0)
    df["u_in_rolling_10"] = df.groupby("breath_id")["u_in"].rolling(window=10).mean().reset_index(drop=True).fillna(0)
    
    df["u_in_max"] = df.groupby("breath_id")["u_in"].transform("max")
    df["u_in_min"] = df.groupby("breath_id")["u_in"].transform("min")
    df["u_in_mean"] = df.groupby("breath_id")["u_in"].transform("mean")
    df["u_out_max"] = df.groupby("breath_id")["u_out"].transform("max")
    df["u_out_min"] = df.groupby("breath_id")["u_out"].transform("min")
    df["u_out_mean"] = df.groupby("breath_id")["u_out"].transform("mean")
    
    for i in range(4):
        df["u_in"+f"_diff{i+1}"] = df["u_in"] - df[f"u_in_lag{i+1}"]
        df["u_in"+f"_diff_back{i+1}"] = df["u_in"] - df[f"u_in_back{i+1}"]

        df["u_out"+f"_diff{i+1}"] = df["u_out"] - df[f"u_out_lag{i+1}"]
        df["u_out"+f"_diff_back{i+1}"] = df["u_out"] - df[f"u_out_back{i+1}"]

    df["u_in_diff_max"] = df["u_in_max"] - df["u_in"]
    df["u_in_diff_min"] = df["u_in_min"] - df["u_in"]
    df["u_in_diff_mean"] = df["u_in_mean"] - df["u_in"]
    
    df["cross"] = df["u_in"] * df["u_out"]
    df["cross2"] = df["time_step"] * df["u_out"]
    
    df["time_class"] = df.groupby("breath_id").cumcount()
    df["R"] = df.R.astype(str)
    df["C"] = df.C.astype(str)
    df["R_C"] = df.R + "_" + df.C 
    gc.collect()
    return df

def group_feature(train, test) -> Tuple[pd.DataFrame, pd.DataFrame]:
    # time_class x u_in
    time_grp = train.groupby("time_class").mean().loc[:, ["u_in"]]
    time_grp = time_grp.rename(columns={"u_in": "u_in_time_class"})
    train = pd.merge(train, time_grp, how="left", left_on="time_class", right_index=True)
    test = pd.merge(test, time_grp, how="left", left_on="time_class", right_index=True)
    del time_grp 
    gc.collect()
    
    print(1)
    
    # R x u_in 
    r = train.groupby("R").mean().loc[:, ["u_in"]]
    r = r.rename(columns={"u_in": "u_in_r_mean"})
    train = pd.merge(train, r, how="left", left_on="R", right_index=True)
    test = pd.merge(test, r, how="left", left_on="R", right_index=True)
    del r 
    gc.collect()

    
    # c x u_in 
    c = train.groupby("C").mean().loc[:, ["u_in"]]
    c = c.rename(columns={"u_in": "u_in_c_mean"})
    train = pd.merge(train, c, how="left", left_on="C", right_index=True)
    test = pd.merge(test, c, how="left", left_on="C", right_index=True)
    del c 
    gc.collect()
    
    print(2)

    # r_c x u_in 
    rc = train.groupby("R_C").mean().loc[:, ["u_in"]]
    rc = rc.rename(columns={"u_in": "u_in_rc_mean"})
    train = pd.merge(train, rc, how="left", left_on="R_C", right_index=True)
    test = pd.merge(test, rc, how="left", left_on="R_C", right_index=True)
    del rc 
    gc.collect()
    
    print(3)

    # r_c, time_class x u_in 
    rc = train.groupby(["R_C", "time_class"]).mean().loc[:, ["u_in"]]
    rc = rc.rename(columns={"u_in": "u_in_rc_time_mean"})
    train = pd.merge(train, rc, how="left", left_on=["R_C", "time_class"], right_index=True)
    test = pd.merge(test, rc, how="left", left_on=["R_C", "time_class"], right_index=True)
    del rc 
    gc.collect()
    
    print(4)

    
    # get dummmies object
    last_train_shape = train.shape[0]
    y = train.pressure.values.ravel()
    df = pd.concat([train.drop("pressure", axis=1), test])
    df = pd.get_dummies(data=df, columns=["R", "C", "R_C"])
    train, test = df.iloc[:last_train_shape, :], df.iloc[last_train_shape:, :]
    del df 
    train["pressure"] = y 
    del y 
    gc.collect()
    return train, test 

In [None]:
%%time 

train = lag_feature(train)
test = lag_feature(test)

In [None]:
%%time 

train, test = group_feature(train, test)

# Model 

In [None]:
@tf.custom_gradient
def round_with_gradients(x):
    def grad(dy):
        return dy
    return tf.round(x), grad

class ScaleLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(ScaleLayer, self).__init__()
        self.min = tf.constant(PRESSURE_MIN, dtype=np.float32)
        self.max = tf.constant(PRESSURE_MAX, dtype=np.float32)
        self.step = tf.constant(PRESSURE_STEP, dtype=np.float32)

    def call(self, inputs):
        steps = tf.math.divide(tf.math.add(inputs, -self.min), self.step)
        int_steps = round_with_gradients(steps)
        rescaled_steps = tf.math.add(tf.math.multiply(int_steps, self.step), self.min)
        clipped = tf.clip_by_value(rescaled_steps, self.min, self.max)
        return clipped
    
def build_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    for hidden in [1024, 512, 256, 128]:
        model.add(Bidirectional(LSTM(hidden ,return_sequences=True)))
    model.add(Dense(128, activation="selu"))
    model.add(Dense(1))
    model.add(ScaleLayer())
    model.compile(optimizer="adam", loss="mae")
    return model 

model = build_model((80, train.shape[-1]))
model.summary()

# Train 

In [None]:
if config["debug"] is not True and config["device"] == "tpu":
    # detect and init the TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

    # instantiate a distribution strategy
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)


def scaler(tr, va, te):
    RS = RobustScaler()
    return RS.fit_transform(tr), RS.transform(va), RS.transform(te)


def mae(pred, corr):
    return np.mean(np.abs(pred - corr))


def submit(pred, name):
    sub = pd.read_csv("../input/ventilator-pressure-prediction/sample_submission.csv")
    sub["pressure"] = pred 
    sub.to_csv(f"submission_lstm_{name}.csv", index=False)
    del sub 
    
    
def callbacks_tools(fold) -> Tuple[object, object, object]:
    lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
    es = EarlyStopping(monitor="val_loss", patience=60, verbose=1, 
                           mode="min", restore_best_weights=True)
    os.makedirs("models", exist_ok=True)
    checkpoint_filepath = f"models/{fold}.hdf5"
    sv = keras.callbacks.ModelCheckpoint(
            checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True,
            save_weights_only=False, mode='auto', save_freq='epoch',
            options=None
    )
    wb = WandbCallback(log_weights=True)
    return lr, es, sv, wb 


def viz_predict(corr, pred):
    plt.figure(figsize=(15, 6))
    
    plt.subplot(121)
    sns.histplot(corr)
    plt.title("Label")
    
    plt.subplot(122)
    sns.histplot(pred)
    plt.title("Predict")
    
    plt.show()
    
def train_nn(train, test):
    with tpu_strategy.scope():
        k = keras.backend
        
        predict_val, val_idx, predict_test = [], [], []
        kf = GroupKFold(config["n_fold"])

        for fold, (tr, va) in enumerate(kf.split(train, train.pressure, train.breath_id)):
            print(f"=====================fold: {fold+1}=========================")
            x_train, x_val = train.iloc[tr].drop(["id", "breath_id", "pressure"], axis=1), train.iloc[va].drop(["id", "breath_id", "pressure"], axis=1)
            y_train, y_val = train.iloc[tr]["pressure"], train.iloc[va]["pressure"]
            use_col = x_train.columns 
            x_test = test[use_col]

            # transform shape 
            x_train, x_val, x_test = scaler(x_train, x_val, x_test)
            x_train = x_train.reshape(-1, 80, len(use_col))
            x_val = x_val.reshape(-1, 80, len(use_col))
            x_test = x_test.reshape(-1, 80, len(use_col))
            y_train = y_train.values.reshape(-1, 80, 1)
            y_val = y_val.values.reshape(-1, 80, 1)

            # setup models
            model = build_model((80, len(use_col)))
            lr, es, sv, wb = callbacks_tools(fold+1)

            # training step 
            model.fit(x_train, 
                     y_train, 
                     validation_data=(x_val, y_val), 
                      callbacks=[lr, es, sv, wb], 
                      epochs= 1 if config["debug"] else config["epoch"],
                      batch_size=config["batch_size"])

            # prediction val test 
            pred_v = model.predict(x_val, batch_size=config["batch_size"], verbose=config["verbose"]).squeeze().reshape(-1, 1).squeeze()
            pred_t = model.predict(x_test, batch_size=config["batch_size"], verbose=config["verbose"]).squeeze().reshape(-1, 1).squeeze()
            predict_val.append(pred_v)
            predict_test.append(pred_t)
            val_idx.append(va)

            print(f"fold: {fold+1} | MAE: {mae(pred_v, y_val.squeeze().reshape(-1, 1).squeeze())}")

            del x_train, x_val, x_test, model

        # concat prediction 
        predict_val = np.concatenate(predict_val)
        val_idx = np.concatenate(val_idx)
        val_idx = np.argsort(val_idx)
        predict_val = predict_val[val_idx]
        del val_idx

        # finally cv score 
        print("#############################################################")
        print(f"LSTM CV: {mae(predict_val, train.pressure.values.ravel())}")
        print("#############################################################")

        # predict test transform
        predict_mean = np.mean(predict_test, 0)
        predict_median = np.median(predict_test, 0)
        #### 
        predict_mean_clip = (np.round(predict_mean - PRESSURE_MIN)/PRESSURE_STEP) * PRESSURE_STEP + PRESSURE_MIN
        predict_mean_clip = np.clip(predict_mean_clip, PRESSURE_MIN, PRESSURE_MAX)

        predict_median_clip = (np.round(predict_median - PRESSURE_MIN)/PRESSURE_STEP) * PRESSURE_STEP + PRESSURE_MIN
        predict_median_clip = np.clip(predict_median_clip, PRESSURE_MIN, PRESSURE_MAX)
        ### 

        # submit 
        if config["debug"] is not True:
            submit(predict_mean, "mean")
            submit(predict_median, "median")
            submit(predict_mean_clip, "mean_clip")
            submit(predict_median_clip, "median_clip")

        gc.collect()
        slack("lstm done.")
        return predict_val 


In [None]:
pred_v = train_nn(train, test)

In [None]:
viz_predict(train.pressure.values.ravel(), pred_v)