In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import os 
import time 
import json 
import requests 
from tqdm import tqdm 
import wandb 
from wandb.keras import WandbCallback 
from kaggle_secrets import UserSecretsClient 
import random 
from typing import Tuple 
import gc 

from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential 
from tensorflow.keras import layers , Model
from tensorflow.keras.layers import MultiHeadAttention, Input, Dropout, Dense, Conv1D, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K
# from keras_pos_embd import PositionEmbedding

def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything()
pd.set_option("display.max_columns", None)

# Config 

In [None]:
config = dict(
    competition = "ventilator", 
    infra = "kaggle", 
    train = True, 
    type = "train", 
    debug = False, 
    inference = True, 
    
    model_name = "transformer", 
    frame_word = "tensorflow", 
    device = "tpu", 
    n_fold = 5, 
    early_stopping_rounds = 30, 
    batch_size = 1024, 
    epoch = 530, 
    verbose = 100, 
    seed = 42 
)

params = {
    "input_size": (80, 73),
    "hidden_dim": 128, 
    "head_size": 256, 
    "num_heads": 12, 
    "ff_dim": 4, 
    "num_transformer_blocks": 4, 
    "mlp_units": [128], 
    "dropout": 0.2, 
    "mlp_dropout": 0 , 
}

In [None]:
user_secrets = UserSecretsClient()
url = user_secrets.get_secret("WEB_HOOK_URL") 

user_secrets = UserSecretsClient()
api = user_secrets.get_secret("wandb_api")


def setup_db():
    wandb.login(key=api)
    run = wandb.init(
        project = config["competition"], 
        name = config["model_name"], 
        config = config, 
        group = config["model_name"], 
        job_type = config["type"]
    )
    return run

def slack(txt):
    requests.post(url, data=json.dumps({
        "username": "kaggle", 
        "text": txt 
    }))

# Feature engineering 

In [None]:
if config["debug"]:
    train = pd.read_csv("../input/ventilator-pressure-prediction/train.csv", nrows=80*100)
    test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv", nrows=80*100)
else:
    train = pd.read_csv("../input/ventilator-pressure-prediction/train.csv")
    test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv")

sort = np.sort(train.pressure.unique())
PRESSURE_MIN = sort[0]
PRESSURE_MAX = sort[-1]
PRESSURE_STEP = sort[1] - sort[0]

In [None]:
def reduce_mem_usage(train_data):
    start_mem = train_data.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    for col in train_data.columns:
        col_type = train_data[col].dtype

        if col_type != object:
            c_min = train_data[col].min()
            c_max = train_data[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    train_data[col] = train_data[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    train_data[col] = train_data[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    train_data[col] = train_data[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    train_data[col] = train_data[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    train_data[col] = train_data[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    train_data[col] = train_data[col].astype(np.float32)
                else:
                    train_data[col] = train_data[col].astype(np.float64)
        else:
            train_data[col] = train_data[col].astype('category')

    end_mem = train_data.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return train_data

In [None]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [None]:
def lag_feature(df) -> pd.DataFrame:
    df["area"] = df.time_step * df.u_in 
    df["area"] = df.groupby("breath_id")["area"].cumsum()
    
    df["u_in_cumsum"] = df.groupby("breath_id")["u_in"].cumsum()
    
    for i in range(4):
        df["u_in_"+f"lag{i+1}"] = df.groupby("breath_id")["u_in"].shift(i+1).fillna(0)
        df["u_out_"+f"lag{i+1}"] = df.groupby("breath_id")["u_out"].shift(i+1).fillna(0)

        df["u_in_"+f"back{i+1}"] = df.groupby("breath_id")["u_in"].shift((-1)*(i+1)).fillna(0)
        df["u_out_"+f"back{i+1}"] = df.groupby("breath_id")["u_out"].shift((-1)*(i+1)).fillna(0)

    df["u_out_rolling_10"] = df.groupby("breath_id")["u_out"].rolling(window=10).mean().reset_index(drop=True).fillna(0)
    df["u_in_rolling_10"] = df.groupby("breath_id")["u_in"].rolling(window=10).mean().reset_index(drop=True).fillna(0)
    
    df["u_in_max"] = df.groupby("breath_id")["u_in"].transform("max")
    df["u_in_min"] = df.groupby("breath_id")["u_in"].transform("min")
    df["u_in_mean"] = df.groupby("breath_id")["u_in"].transform("mean")
    df["u_out_max"] = df.groupby("breath_id")["u_out"].transform("max")
    df["u_out_min"] = df.groupby("breath_id")["u_out"].transform("min")
    df["u_out_mean"] = df.groupby("breath_id")["u_out"].transform("mean")
    
    df["u_in_first"] = df.groupby("breath_id")["u_in"].transform("first")
    df["u_in_last"] = df.groupby("breath_id")["u_in"].transform("last")
    
    for i in range(4):
        df["u_in"+f"_diff{i+1}"] = df["u_in"] - df[f"u_in_lag{i+1}"]
        df["u_in"+f"_diff_back{i+1}"] = df["u_in"] - df[f"u_in_back{i+1}"]

        df["u_out"+f"_diff{i+1}"] = df["u_out"] - df[f"u_out_lag{i+1}"]
        df["u_out"+f"_diff_back{i+1}"] = df["u_out"] - df[f"u_out_back{i+1}"]

    df["u_in_diff_max"] = df["u_in_max"] - df["u_in"]
    df["u_in_diff_min"] = df["u_in_min"] - df["u_in"]
    df["u_in_diff_mean"] = df["u_in_mean"] - df["u_in"]
    
    df["cross"] = df["u_in"] * df["u_out"]
    df["cross2"] = df["time_step"] * df["u_out"]
    
    df["time_class"] = df.groupby("breath_id").cumcount()
    df["R"] = df.R.astype(str)
    df["C"] = df.C.astype(str)
    df["R_C"] = df.R + "_" + df.C 
    gc.collect()
    return df

def group_feature(train, test) -> Tuple[pd.DataFrame, pd.DataFrame]:
    # time_class x u_in
    time_grp = train.groupby("time_class").mean().loc[:, ["u_in"]]
    time_grp = time_grp.rename(columns={"u_in": "u_in_time_class"})
    train = pd.merge(train, time_grp, how="left", left_on="time_class", right_index=True)
    test = pd.merge(test, time_grp, how="left", left_on="time_class", right_index=True)
    del time_grp 
    gc.collect()
    
    print(1)
    
    # R x u_in 
    r = train.groupby("R").mean().loc[:, ["u_in"]]
    r = r.rename(columns={"u_in": "u_in_r_mean"})
    train = pd.merge(train, r, how="left", left_on="R", right_index=True)
    test = pd.merge(test, r, how="left", left_on="R", right_index=True)
    del r 
    gc.collect()

    
    # c x u_in 
    c = train.groupby("C").mean().loc[:, ["u_in"]]
    c = c.rename(columns={"u_in": "u_in_c_mean"})
    train = pd.merge(train, c, how="left", left_on="C", right_index=True)
    test = pd.merge(test, c, how="left", left_on="C", right_index=True)
    del c 
    gc.collect()
    
    print(2)

    # r_c x u_in 
    rc = train.groupby("R_C").mean().loc[:, ["u_in"]]
    rc = rc.rename(columns={"u_in": "u_in_rc_mean"})
    train = pd.merge(train, rc, how="left", left_on="R_C", right_index=True)
    test = pd.merge(test, rc, how="left", left_on="R_C", right_index=True)
    del rc 
    gc.collect()
    
    print(3)

    # r_c, time_class x u_in 
    rc = train.groupby(["R_C", "time_class"]).mean().loc[:, ["u_in"]]
    rc = rc.rename(columns={"u_in": "u_in_rc_time_mean"})
    train = pd.merge(train, rc, how="left", left_on=["R_C", "time_class"], right_index=True)
    test = pd.merge(test, rc, how="left", left_on=["R_C", "time_class"], right_index=True)
    del rc 
    gc.collect()
    
    print(4)
    
    # get dummmies object
    last_train_shape = train.shape[0]
    y = train.pressure.values.ravel()
    df = pd.concat([train.drop("pressure", axis=1), test])
    df = pd.get_dummies(data=df, columns=["R", "C", "R_C"])
    train, test = df.iloc[:last_train_shape, :], df.iloc[last_train_shape:, :]
    del df 
    train["pressure"] = y 
    del y 
    gc.collect()
    return train, test 

In [None]:
%%time 

train = lag_feature(train)
test = lag_feature(test)

In [None]:
%%time 

train, test = group_feature(train, test)

In [None]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [None]:
len(train.drop(["id", "breath_id", "pressure"], axis=1).columns) # use train features = model input size 

# Transformer Model 

In [None]:
@tf.custom_gradient
def round_with_gradients(x):
    def grad(dy):
        return dy
    return tf.round(x), grad

class ScaleLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(ScaleLayer, self).__init__()
        self.min = tf.constant(PRESSURE_MIN, dtype=np.float32)
        self.max = tf.constant(PRESSURE_MAX, dtype=np.float32)
        self.step = tf.constant(PRESSURE_STEP, dtype=np.float32)

    def call(self, inputs):
        steps = tf.math.divide(tf.math.add(inputs, -self.min), self.step)
        int_steps = round_with_gradients(steps)
        rescaled_steps = tf.math.add(tf.math.multiply(int_steps, self.step), self.min)
        clipped = tf.clip_by_value(rescaled_steps, self.min, self.max)
        return clipped
    
    
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self):
        super(PositionalEncoding, self).__init__()
        position = 80 
        emb_dim = params["hidden_dim"]
        self.pos_encoding = self._positional_encoding(position, emb_dim)
        
    def _get_angles(self, position, i, emb_dim):
        """
        assign position, i and emb_dim to the expression of the angle of positional encoding formulae
        outputs: shape=(position.shape[0], i.shape[1])
        """
        denominator = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(emb_dim, tf.float32))
        return position * denominator

    def _positional_encoding(self, sentence_length, emb_dim):
        """
        inputs:
        sentence_length: int
        emb_dim: int
        
        outputs:
        output: shape=(1, sentence_length, emb_dim), float32
        """
        # 計算を効率化するためにpositionとiを行列にしてangle計算を行列の積で一度に実行する
        angle = self._get_angles(
            position=tf.expand_dims(tf.range(sentence_length, dtype=tf.float32), -1),
            i=tf.expand_dims(tf.range(emb_dim, dtype=tf.float32), 0),
            emb_dim=emb_dim
        )
        
        # インデックスが偶数のものはサイン関数に適応
        sine = tf.math.sin(angle[:, 0::2])
        # インデックスが奇数のものはコサイン関数に適応
        cos = tf.math.cos(angle[:, 1::2])
        
        pos_encoding = tf.concat([sine, cos], axis=-1)
        pos_encoding = tf.expand_dims(pos_encoding, 0)
        return tf.cast(pos_encoding, tf.float32)
    
    def call(self, inputs):
        """
        inputs: shape=(batch, sentence_length, emb_dim)
        """
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
    
    
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = LayerNormalization(epsilon=1e-6)(res)
    x = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res


def base_model(
    input_shape=params["input_size"], 
    head_size=params["head_size"],
    hidden_dim = params["hidden_dim"],
    num_heads=params["num_heads"],
    ff_dim=params["ff_dim"],
    num_transformer_blocks=params["num_transformer_blocks"], 
    mlp_units=params["mlp_units"], 
    dropout=params["dropout"],
    mlp_dropout=params["mlp_dropout"]
):
    inputs = Input(shape=input_shape)
    x = inputs
    x = Dense(hidden_dim, activation="relu")(x)
    x = PositionalEncoding()(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    for dim in mlp_units:
        x = Dense(dim, activation="selu")(x)
        x = Dropout(mlp_dropout)(x)
    outputs = Dense(1)(x)
    output = ScaleLayer()(x)
    return Model(inputs, outputs)

def build_model():
    model = base_model()
    model.compile(loss="mae", optimizer="adam")
    return model 

model = build_model()
model.summary()

# Train 

In [None]:
if config["debug"] is not True and config["device"] == "tpu":
    # detect and init the TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

    # instantiate a distribution strategy
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)


def mae(corr, pred):
    return np.mean(np.abs(corr - pred))


def scaler(tr, va, te):
    RS = RobustScaler()
    return RS.fit_transform(tr), RS.transform(va), RS.transform(te)


def callback_tools(fold) -> Tuple[object, object, object, object]:
    lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
    es = EarlyStopping(monitor="val_loss", patience=60, verbose=1, 
                           mode="min", restore_best_weights=True)
    os.makedirs("models", exist_ok=True)
    checkpoint_filepath = f"models/{fold}.hdf5"
    sv = keras.callbacks.ModelCheckpoint(
            checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True,
            save_weights_only=False, mode='auto', save_freq='epoch',
            options=None
    )
    wb = WandbCallback(log_weights=True)
    return lr, es, sv, wb 


def submit(pred, name):
    sub = pd.read_csv("../input/ventilator-pressure-prediction/sample_submission.csv")
    sub["pressure"] = pred 
    sub.to_csv(f"submission_lstm_{name}.csv", index=False)
    del sub 
    
    
def viz_predict(corr, pred):
    plt.figure(figsize=(15, 6))
    
    plt.subplot(121)
    sns.histplot(corr)
    plt.title("Label")
    
    plt.subplot(122)
    sns.histplot(pred)
    plt.title("Predict")
    
    plt.show()
    

def train_fn(train, test):
    
    with tpu_strategy.scope():

        predict_val, val_idx, predict_test = [], [], []
        kf = GroupKFold(n_splits=2 if config["debug"] else config["n_fold"])

        for fold, (tr, va) in enumerate(kf.split(train, train.pressure, train.breath_id)):
            print(f"=====================fold: {fold+1}==========================")
            x_train, x_val = train.iloc[tr].drop(["id", "pressure", "breath_id"], axis=1), train.iloc[va].drop(["id", "pressure", "breath_id"], axis=1)
            y_train, y_val = train.iloc[tr]["pressure"], train.iloc[va]["pressure"]
            use_col = x_train.columns 
            x_test = test[use_col]

            # scaler 
            x_train, x_val, x_test = scaler(x_train, x_val, x_test)

            # transformer batch shape 
            x_train = x_train.reshape(-1, 80, params["input_size"][1])
            x_val = x_val.reshape(-1, 80, params["input_size"][1]) 
            x_test = x_test.reshape(-1, 80, params["input_size"][1])        
            y_train = y_train.values.reshape(-1, 80, 1)
            y_val = y_val.values.reshape(-1, 80, 1)        

            # set up tools 
            run = setup_db()
            model = build_model()
            lr, es, sv, ws = callback_tools(fold)
            wandb.config.fold = fold 

            model.fit(x_train,
                     y_train,
                     validation_data=(x_val, y_val),
                     callbacks=[lr, es, sv, ws],
                     epochs=1 if config["debug"] else config["epoch"],
                     batch_size=config["batch_size"])

            pred_v = model.predict(x_val, batch_size=config["batch_size"], verbose=config["verbose"]).squeeze().reshape(-1, 1).squeeze()
            pred_t = model.predict(x_test, batch_size=config["batch_size"], verbose=config["verbose"]).squeeze().reshape(-1, 1).squeeze()

            predict_val.append(pred_v)
            predict_test.append(pred_t)        
            val_idx.append(va)

            print(f"fold: {fold+1} | mae: {mae(y_val.squeeze().reshape(-1, 1).squeeze(), pred_v)}")

            del x_train, x_val, x_test, model 
            gc.collect()

        predict_val = np.concatenate(predict_val)
        val_idx = np.concatenate(val_idx)
        val_idx = np.argsort(val_idx)
        predict_val = predict_val[val_idx]

        print("##############################################################")
        print(f"CV SCORE: {mae(train.pressure.values.ravel(), predict_val)}")
        print("##############################################################")

        predict_test_mean = np.mean(predict_test, 0)
        predict_test_median = np.median(predict_test, 0)

        predict_test_mean_clip =(np.round(predict_test_mean - PRESSURE_MIN)/ PRESSURE_STEP) * PRESSURE_STEP + PRESSURE_MIN
        predict_test_mean_clip = np.clip(predict_test_mean_clip, PRESSURE_MIN, PRESSURE_MAX)
        predict_test_median_clip =(np.round(predict_test_median - PRESSURE_MIN)/ PRESSURE_STEP) * PRESSURE_STEP + PRESSURE_MIN
        predict_test_median_clip = np.clip(predict_test_median_clip, PRESSURE_MIN, PRESSURE_MAX)

        # submit 
        if config["debug"] is not True:
            submit(predict_test_mean, "mean")
            submit(predict_test_median, "median")
            submit(predict_test_mean_clip, "mean_clip")
            submit(predict_test_median_clip, "median_clip")

        gc.collect()
        slack("Transfomer model done.")
        return predict_val 

In [None]:
pred_v = train_fn(train, test)

In [None]:
viz_predict(pred_v, train.pressure.values.ravel())