# Imports

In [None]:
import numpy as np
import pandas as pd

import optuna

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.layers import Conv1D, Input, Dense, Add, Multiply
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import models
import tensorflow_addons as tfa

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

from IPython.display import display
from tqdm.notebook import tqdm

%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
tf.__version__

# Load Data and some statistics

In [None]:
train_ori = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test_ori = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
train_ori

In [None]:
print(f'Length of TRAIN dataset: {len(train_ori)}')
print(f'Length of TEST dataset: {len(test_ori)}')
print('')
print('Missing values in TRAIN dataset')
for i in train_ori.iloc[:, 0:-1].columns.tolist():
    print(f'{i}: {train_ori[i].isna().sum()}')
print('')
print('Missing values in TEST dataset')
for i in test_ori.iloc[:, 0:-1].columns.tolist():
    print(f'{i}: {test_ori[i].isna().sum()}')
print('')
print(f'Number of breaths in train dataset: {train_ori["breath_id"].nunique()}')
print(f'Number of breaths in test dataset: {test_ori["breath_id"].nunique()}')
print(f'The number of observations for each breath: {train_ori["breath_id"].value_counts().reset_index()["breath_id"].unique()[0]}')

In [None]:
train_initial = train_ori[train_ori['time_step']==0]
train_initial

In [None]:
total_mean = train_initial['pressure'].mean()
total_std = train_initial['pressure'].std()
print('total mean: {}'.format(total_mean))
print('total std: {}'.format(total_std))

In [None]:
initial_stat_mean = train_initial.groupby(['R', 'C']).mean()
initial_stat_std = train_initial.groupby(['R', 'C']).std()

In [None]:
initial_stat = pd.DataFrame({'R': [5, 5, 5, 20, 20, 20, 50, 50, 50],
                            'C': [10, 20, 50, 10, 20, 50, 10, 20, 50],
                            'mean': initial_stat_mean['pressure'].values,
                            'std': initial_stat_std['pressure'].values})
initial_stat

# Add Features

In [None]:
def add_features(df):
    df['cross']= df['u_in'] * df['u_out']
    df['cross1'] = df['u_in'] * (1 - df['u_out'])
    df['cross2']= df['time_step'] * df['u_out']
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['time_step_cumsum'] = df.groupby(['breath_id'])['time_step'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    print("Step-1...Completed")
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)
    print("Step-2...Completed")
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    print("Step-3...Completed")
    
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['u_in_lagback_diff1'] = df['u_in'] - df['u_in_lag_back1']
    df['u_out_lagback_diff1'] = df['u_out'] - df['u_out_lag_back1']
    df['u_in_lagback_diff2'] = df['u_in'] - df['u_in_lag_back2']
    df['u_out_lagback_diff2'] = df['u_out'] - df['u_out_lag_back2']
    df['u_in_lagback_diff3'] = df['u_in'] - df['u_in_lag_back3']
    df['u_out_lagback_diff3'] = df['u_out'] - df['u_out_lag_back3']
    df['u_in_lagback_diff4'] = df['u_in'] - df['u_in_lag_back4']
    df['u_out_lagback_diff4'] = df['u_out'] - df['u_out_lag_back4']
    print("Step-4...Completed")
    
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    
    '''df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['breath_id__u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['breath_id__u_in_lag'] = df['breath_id__u_in_lag'] * df['breath_id_lagsame']
    df['breath_id__u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['breath_id__u_in_lag2'] = df['breath_id__u_in_lag2'] * df['breath_id_lag2same']'''
    print("Step-5...Completed")
    
    df['time_step_diff'] = df.groupby('breath_id')['time_step'].diff().fillna(0)
    
    '''df['ewm_u_in_mean'] = (df\
                           .groupby('breath_id')['u_in']\
                           .ewm(halflife=9)\
                           .mean()\
                           .reset_index(level=0,drop=True))
    df[["15_in_sum","15_in_min","15_in_max","15_in_mean"]] = (df\
                                                              .groupby('breath_id')['u_in']\
                                                              .rolling(window=15,min_periods=1)\
                                                              .agg({"15_in_sum":"sum",
                                                                    "15_in_min":"min",
                                                                    "15_in_max":"max",
                                                                    "15_in_mean":"mean"
                                                                    #"15_in_std":"std"
                                                               })\
                                                               .reset_index(level=0,drop=True))'''
    print("Step-6...Completed")
    
    #df['u_in_diff_1_2'] = df['u_in_lag1'] - df['u_in_lag2']
    #df['u_out_diff_1_2'] = df['u_out_lag1'] - df['u_out_lag2']
    #df['u_in_lagback_diff_1_2'] = df['u_in_lag_back1'] - df['u_in_lag_back2']
    #df['u_out_lagback_diff_1_2'] = df['u_out_lag_back1'] - df['u_out_lag_back2']
    
    df['u_in_diff1_lag'] = df.groupby('breath_id')['u_in_diff1'].shift(1)
    df['u_in_diff2_lag'] = df.groupby('breath_id')['u_in_diff2'].shift(1)
    df['u_in_diff3_lag'] = df.groupby('breath_id')['u_in_diff3'].shift(1)
    df['u_in_diff4_lag'] = df.groupby('breath_id')['u_in_diff4'].shift(1)
    df = df.fillna(0)
    df['u_in_diff1_diff'] = df['u_in_diff1'] - df['u_in_diff1_lag']
    df['u_in_diff2_diff'] = df['u_in_diff1'] - df['u_in_diff2_lag']
    df['u_in_diff3_diff'] = df['u_in_diff1'] - df['u_in_diff3_lag']
    df['u_in_diff4_diff'] = df['u_in_diff1'] - df['u_in_diff4_lag']
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    print("Step-8...Completed")
    return df

train = add_features(train_ori)
test = add_features(test_ori)

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure', 'id', 'breath_id', 'one', 'count', 'u_in_lag1','u_in_lag2','u_in_lag3','u_in_lag4',
           'u_out_lag1','u_out_lag2','u_out_lag3','u_out_lag4','u_in_lag_back1','u_in_lag_back2','u_in_lag_back3','u_in_lag_back4',
           'u_out_lag_back1','u_out_lag_back2','u_out_lag_back3','u_out_lag_back4','u_in_diff1_lag','u_in_diff2_lag','u_in_diff3_lag','u_in_diff4_lag'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id','one', 'count', 'u_in_lag1','u_in_lag2','u_in_lag3','u_in_lag4',
           'u_out_lag1','u_out_lag2','u_out_lag3','u_out_lag4','u_in_lag_back1','u_in_lag_back2','u_in_lag_back3','u_in_lag_back4',
           'u_out_lag_back1','u_out_lag_back2','u_out_lag_back3','u_out_lag_back4','u_in_diff1_lag','u_in_diff2_lag','u_in_diff3_lag','u_in_diff4_lag'], axis=1)

In [None]:
train.columns

In [None]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, test.shape[-1])

In [None]:
train.shape

In [None]:
shape_ = train.shape[1:]

In [None]:
targets = targets[:,:,np.newaxis]

# Configuration

In [None]:
EPOCH = 300
BATCH_SIZE = 128
LR = 1e-3

# Define Model
- https://github.com/stdereka/liverpool-ion-switching/blob/f44c57e88a3e7889720c88710135f2c7d31b416e/model/nn.py#L116

In [None]:
def conv_block(x: tf.Tensor, filters: int, kernel_size: int):
    """
    Implements convolution block with residual connection.
    :param x: Input tensor.
    :param filters: Number of filters in convolution layer.
    :param kernel_size: Filter size.
    :return: Output tensor.
    """
    x = Conv1D(filters=filters,
               kernel_size=1,
               padding='same')(x)
    res_x = x
    x = Conv1D(filters=filters,
               kernel_size=kernel_size,
               padding='same', activation='relu')(x)
    x = Conv1D(filters=filters,
               kernel_size=kernel_size,
               padding='same', activation='relu')(x)
    x = Conv1D(filters=filters,
               kernel_size=kernel_size,
               padding='same', activation='relu')(x)
    res_x = Add()([res_x, x])
    return res_x

def wave_block(x: tf.Tensor, filters: int, kernel_size: int, n: int):
    """
    Implements wavenet block.
    :param x: Input tensor.
    :param filters: Number of kernels.
    :param kernel_size: Filter size.
    :param n: Number of dilation rates for convolutions.
    :return: Output tensor.
    """
    dilation_rates = [2 ** i for i in range(n)]
    x = Conv1D(filters=filters,
               kernel_size=1,
               padding='same')(x)
    res_x = x
    for dilation_rate in dilation_rates:
        tanh_out = Conv1D(filters=filters,
                          kernel_size=kernel_size,
                          padding='same',
                          activation='tanh',
                          dilation_rate=dilation_rate)(x)
        sigm_out = Conv1D(filters=filters,
                          kernel_size=kernel_size,
                          padding='same',
                          activation='sigmoid',
                          dilation_rate=dilation_rate)(x)
        x = Multiply()([tanh_out, sigm_out])
        x = Conv1D(filters=filters,
                   kernel_size=1,
                   padding='same')(x)
        res_x = Add()([res_x, x])
    return res_x

In [None]:
def get_model():
    inp = Input(shape_)
    x = conv_block(inp, 16, 3)
    x = wave_block(x, 16, 3, 12)
    x = conv_block(x, 32, 3)
    x = wave_block(x, 32, 3, 8)
    x = conv_block(x, 64, 3)
    x = wave_block(x, 64, 3, 4)
    x = conv_block(x, 128, 3)
    x = wave_block(x, 128, 3, 1)
    x = keras.layers.Activation('swish')(x)
    out = Dense(1, name='out')(x)
    model = models.Model(inputs=inp, outputs=out)
    opt = Adam(lr=LR)
    opt = tfa.optimizers.SWA(opt)
    model.compile(loss='mae', optimizer=opt)
    return model

# Training

In [None]:
#submission_tmp = submission.copy()

In [None]:
# detect and init the TPU
#tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
#tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

#with tpu_strategy.scope():
kf = KFold(n_splits=5, shuffle=True, random_state=2021)
test_preds = []
for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
    if (fold != 0) and (fold != 1):
        continue
    submission_tmp = submission.copy()
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = targets[train_idx], targets[test_idx]
    X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
    X_valid = tf.convert_to_tensor(X_valid, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    y_valid = tf.convert_to_tensor(y_valid, dtype=tf.float32)
    model = get_model()

    scheduler = ExponentialDecay(1e-3, 400*((len(train)*0.8)/BATCH_SIZE), 1e-5)
    lr = LearningRateScheduler(scheduler, verbose=1)

    #es = EarlyStopping(monitor="val_loss", patience=15, verbose=1, mode="min", restore_best_weights=True)
    sv = tf.keras.callbacks.ModelCheckpoint(
    'fold-%i.h5'%fold, monitor='val_loss', verbose=0, save_best_only=True,
    save_weights_only=True, mode='min', save_freq='epoch')

    history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[lr, sv])
    model.load_weights('fold-%i.h5'%fold)
    pred = model.predict(test).squeeze().reshape(-1, 1).squeeze()
    submission_tmp['pressure'] = pred
    submission_tmp.to_csv('submission_{}.csv'.format(fold), index=False)
    test_preds.append(pred)
    
    plt.figure(figsize=(15, 5))
    plt.plot(
        np.arange(len(history.history["loss"])),
        history.history["loss"],
        "-o",
        label="Train Loss",
        color="#2ca02c")
    plt.plot(
        np.arange(len(history.history["loss"])),
        history.history["val_loss"],
        "-o",
        label="Val Loss",
        color="#d62728")
    
    x = np.argmin(history.history["val_loss"])
    y = np.min(history.history["val_loss"])
    x1 = np.argmin(history.history["loss"])
    y1 = np.min(history.history["loss"])
    
    xdist = plt.xlim()[1] - plt.xlim()[0]
    ydist = plt.ylim()[1] - plt.ylim()[0]

    plt.scatter(x, y, s=200, color="#d62728")
    plt.text(x - 0.03 * xdist, y + 0.05 * ydist, "min val_loss:{:.4f}".format(y), size=14)
    plt.scatter(x1, y1, s=200, color="#2ca02c")
    plt.text(x1 - 0.03 * xdist, y1 + 0.05 * ydist, "min loss:{:.4f}".format(y1), size=14)

    plt.xlabel("Epoch", size=14)
    plt.ylabel("Loss", size=14)

    plt.legend()
    plt.savefig(f"fig{fold}.png")
    plt.show()

In [None]:
submission["pressure"] = sum(test_preds)/2
submission.to_csv('submission.csv', index=False)