# IMPORT LIB

In [None]:
import gc
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Bidirectional, LSTM
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.layers import Concatenate, Add, GRU


In [None]:
#Modify according to the seeds you set
np.random.seed(42)
tf.random.set_seed(42)

# Data Load

In [None]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
print(f"train_df: {train_df.shape}")
train_df.head()

test_df = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
print(f"test_df: {test_df.shape}")
test_df.head()

In [None]:
all_pressure = sorted(train_df.pressure.unique())
PRESSURE_MIN = all_pressure[0].item()
PRESSURE_MAX = all_pressure[-1].item()
PRESSURE_STEP = ( all_pressure[1] - all_pressure[0] ).item()

# Feature Engineering

In [None]:
def prepare_set(df): #CV 0.1579
    
    np.random.seed(42)
    df['noise'] = np.random.choice([0, PRESSURE_STEP, -PRESSURE_STEP], len(df))
    
    df['flow'] = np.sqrt(2*df['u_in'])
    
    df['u_in_log'] = np.log(df['u_in']).replace(-np.inf,0)
        
    #df['area'] = df['time_step'] * df['u_in']
    #df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['state'] = np.array([1 if x>0 else 0 for x in df['u_in']]) - df['u_out']
    
    #df['exhale'] = df.groupby('breath_id')['u_out'].cumsum()
        
    df['delta_time'] = df.groupby('breath_id')['time_step'].diff().fillna(0)
    df['delta_u_in'] = df.groupby(df['breath_id'])['u_in'].diff().fillna(0).reset_index(level=0,drop=True)     
    df['delta_flow'] = df.groupby(df['breath_id'])['flow'].diff().fillna(0).reset_index(level=0,drop=True)          
    
    df['inhale_time'] = df['state'] *  df['delta_time'] * (1 - df['u_out'])
    df['inhale_time'] = (df.groupby(df['breath_id'])['inhale_time']).cumsum()  * (1 - df['u_out'])

    df['flow_1st_der'] = (df['delta_flow'] /df['delta_time']).fillna(0)
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['flow_cumsum'] = (df['flow']).groupby(df['breath_id']).cumsum()
    
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2).fillna(0).reset_index(level=0,drop=True)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4).fillna(0).reset_index(level=0,drop=True)

    df['u_in_lag-1'] = df.groupby('breath_id')['u_in'].shift(-1).fillna(0).reset_index(level=0,drop=True)
    
    df['u_in_dif2'] = df['u_in'] - df['u_in_lag2']
    df['u_in_dif4'] = df['u_in'] - df['u_in_lag4']
    df['u_in_dif-1'] = df['u_in'] - df['u_in_lag-1']
    
    df['volume_mean']= df['flow'] * df['delta_time']

       
    df['volume_in_cumsum']=df.groupby('breath_id')['volume_mean'].cumsum()     
    df['volume_in_cumsum_reverse']=df.groupby(df['breath_id'])['volume_in_cumsum'].transform('max')  - df['volume_in_cumsum']
        
    df['_volume'] = df['volume_in_cumsum'] * (1 - df['u_out'])
    df['tidal_volume']=df.groupby(df['breath_id'])['_volume'].transform('max')
    
    df['volume_part'] = (df['volume_mean']/df['volume_in_cumsum']).fillna(0)
    df['volume_part'] = df.groupby('breath_id')['volume_part'].shift(-1).fillna(0).reset_index(level=0,drop=True)
    
    df['time_constant'] = df.groupby(df['breath_id'])['inhale_time'].transform('max')
    df['V_dot'] = df['tidal_volume'] / df['time_constant']
    

    df['u_in_rol_q0.25'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1, center=True).quantile(0.25).reset_index(level=0,drop=True)
    df['u_in_rol_q0.75'] = df.groupby(df['breath_id'])['u_in'].rolling(window=10, min_periods=1, center=True).quantile(0.75).reset_index(level=0,drop=True)

    
    df['dP_on_R'] = df['flow'] * df['R']/1000 
    df['lung_expand'] = df['dP_on_R'] * df['C'] 
    df['dP_on_C'] = df['flow'] * df['delta_time'] * 1000 / df['C'] 
    df['dP_on_C_cumsum'] = df.groupby('breath_id')['dP_on_C'].cumsum()   
    
    df['RC'] = df['R'].astype(str) + df['C'].astype(str)
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df = pd.get_dummies(df)

    df['u_in_1st_order_grad'] = np.stack(df.groupby(df['breath_id'])['u_in'].apply(np.gradient).values).reshape(len(df),)
    

    df = df.fillna(0)
    
    return df

train = prepare_set(train_df)
test = prepare_set(test_df)

del train_df, test_df
gc.collect()

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
u_outs = train[['u_out']].to_numpy().reshape(-1, 80)

train.drop(['pressure', 'id', 'breath_id'], axis = 1, inplace = True)
test = test.drop(['id', 'breath_id'], axis = 1)

print(f"train: {train.shape}")

In [None]:
scaler = RobustScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

print(f"train: {train.shape} \n targets: {targets.shape}")

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
    BATCH_SIZE = strategy.num_replicas_in_sync * 64
    print("Running on TPU:", tpu.master())
    print(f"Batch Size: {BATCH_SIZE}")
    
except ValueError:
    strategy = tf.distribute.get_strategy()
    BATCH_SIZE = 512
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    print(f"Batch Size: {BATCH_SIZE}")

In [None]:
def GBVPP_loss(y_true, y_pred, cols = 80):
    u_out = y_true[:, cols: ]
    y = y_true[:, :cols ]

    w = 1 - u_out
    mae = w * tf.abs(y - y_pred)
    return tf.reduce_sum(mae, axis=-1) / tf.reduce_sum(w, axis=-1)
   
def get_model():  
    x_input = keras.Input(shape=(train.shape[-2:]))
    
    x1 = layers.Bidirectional(layers.LSTM(units=768, return_sequences=True))(x_input)
    x2 = layers.Bidirectional(layers.LSTM(units=512, return_sequences=True))(x1)
    x3 = layers.Bidirectional(layers.LSTM(units=384, return_sequences=True))(x2)
    x4 = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x3)
    x5 = layers.Bidirectional(layers.LSTM(units=128, return_sequences=True))(x4)
    
    z2 = layers.Bidirectional(layers.GRU(units=384, return_sequences=True))(x2)
    
    z31 = layers.Multiply()([x3, z2])
    z31 = layers.BatchNormalization()(z31)
    z3 = layers.Bidirectional(layers.GRU(units=256, return_sequences=True))(z31)
    
    z41 = layers.Multiply()([x4, z3])
    z41 = layers.BatchNormalization()(z41)
    z4 = layers.Bidirectional(layers.GRU(units=128, return_sequences=True))(z41)
    
    z51 = layers.Multiply()([x5, z4])
    z51 = layers.BatchNormalization()(z51)
    z5 = layers.Bidirectional(layers.GRU(units=64, return_sequences=True))(z51)
    
    x = layers.Concatenate(axis=2)([x5, z2, z3, z4, z5])
    
    x = layers.Dense(units=128, activation='selu')(x)
    
    x_output = layers.Dense(units=1)(x)
    
    model = keras.Model(inputs=x_input, outputs=x_output)
    
    model.compile(optimizer = "adam", 
                  #loss = "mae",
                  loss=GBVPP_loss,
                 #sample_weight_mode="temporal",
                 )
    
    return model  

In [None]:
gc.collect()

In [None]:
#keras.backend.clear_session()

In [None]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
train_preds = train_df[['id', 'breath_id', 'pressure']]
train_preds.loc[:, 'modified_breath_id'] = [i for i in range(len(train)) for _ in range(80)]

In [None]:
NUM_FOLD = 10
EPOCH = 40
BATCH_SIZE = 256


with strategy.scope():
    
    VERBOSE = 0
    test_preds = []
    
    kf = KFold(n_splits=NUM_FOLD, shuffle=True, random_state=100)
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        u_out_train, u_out_valid = u_outs[train_idx], u_outs[test_idx]  
        
        model = get_model()
        model_path = f'../input/gb-vpp-yet-another-lstm-colab/best_valid_fold_{fold+1}.hdf5' 
        model.load_weights(model_path)
        
        #keras.backend.set_value(model.optimizer.lr, 0.00007)
        model.compile(optimizer = tf.keras.optimizers.SGD(learning_rate=0.0007), loss=GBVPP_loss)
        
        plateau = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.7, patience=3, verbose=1, min_lr=1e-08)
        estop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min',restore_best_weights=False)

        checkpoint_filepath = f"best_valid_fold_{fold+1}.hdf5"
        sv = keras.callbacks.ModelCheckpoint(
            checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True,
            save_weights_only=False, mode='auto', save_freq='epoch',
            options=None
        )
        
        
        model.fit(X_train, np.append(y_train, u_out_train, axis =1),
                  validation_data = (X_valid, np.append(y_valid, u_out_valid, axis =1)), epochs = EPOCH, 
                  batch_size = BATCH_SIZE, callbacks = [estop, plateau, sv],
                  shuffle=True,
                 )
        
        model.save(f"end_of_fold_{fold+1}.hdf5")
            
        y_true = y_valid.squeeze().reshape(-1, 1)
        y_pred = model.predict(X_valid, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1)
        score = mean_absolute_error(y_true, y_pred)
        train_preds.loc[train_preds.loc[:, 'modified_breath_id'].isin(test_idx), 'pressure'] = y_pred
        print(f"Fold-{fold+1} | OOF Score: {score}")
        
        test_preds.append(model.predict(test, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1).squeeze())
        
        gc.collect()

In [None]:
fea_names = ['id', 'pressure']
train_preds[fea_names].to_csv('oof.csv', index=False)

In [None]:
#sub = pd.read_csv('../input/pressure-speed-and-weights-ltsm/submission.csv')
#sub.to_csv('submission.csv', index=False) 

In [None]:
ss = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

ss['pressure'] = np.median(np.vstack(test_preds),axis=0)
ss["pressure"] =\
    np.round( (ss.pressure - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
ss.pressure = np.clip(ss.pressure, PRESSURE_MIN, PRESSURE_MAX)
ss.to_csv('submission.csv', index=False)