In [None]:
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import BatchNormalization, Conv2D, MaxPool2D, Conv3D, Bidirectional, LSTM, GRU, Add
from tensorflow.keras.layers import MaxPool3D, UpSampling2D, GlobalMaxPool2D, GlobalMaxPool3D
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalAveragePooling2D, GlobalAveragePooling3D, Conv2DTranspose, Concatenate
from tensorflow.keras.layers import Dense, Dropout, Activation, Reshape, Flatten, Input

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical, plot_model

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications import NASNetMobile, Xception, DenseNet121, MobileNetV2, InceptionV3, InceptionResNetV2, vgg16, resnet50, inception_v3, xception, DenseNet201

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import metrics
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import Sequence
import math

import sklearn
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import jaccard_score
from sklearn.cluster import KMeans
from sklearn.preprocessing import RobustScaler
from scipy import stats

import seaborn as sns

import skimage
from skimage.transform import rotate

from tqdm.notebook import tqdm
from datetime import datetime

from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, roc_auc_score, roc_curve, mean_absolute_error
import numpy as np
import os
import cv2
import pandas as pd
# import imutils
import random
from PIL import Image
import matplotlib.pyplot as plt

import pickle
import urllib

import warnings
warnings.filterwarnings("ignore")

from PIL import Image

import tensorflow_addons as tfa

from IPython.display import HTML

import pydicom
from pydicom.pixel_data_handlers.util import apply_color_lut
import re

import itertools

from sklearn.utils import shuffle

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    BATCH_SIZE = tpu_strategy.num_replicas_in_sync * 64
    print("Running on TPU:", tpu.master())
    print(f"Batch Size: {BATCH_SIZE}")
    
except ValueError:
    strategy = tf.distribute.get_strategy()
    BATCH_SIZE = 512
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    print(f"Batch Size: {BATCH_SIZE}")

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv').astype(np.float32)
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv').astype(np.float32)

In [None]:
train

In [None]:
def add_features(df):
    df = df.copy()
    
    df_group = df.groupby(['breath_id'])
    
    feature_list = ['u_in', 'time_step', 'cross']
    
    df['cross']= df['u_in'] * df['u_out']
    df['area_out']= df['time_step'] * df['u_out']
    df['area'] = df['time_step'] * df['u_in']
    
    # add
    df['air_flow_rate'] = df['u_out'] - (df['u_in']/100.0)
    df['air_flow_area'] = df['air_flow_rate'] * df['time_step']
    print("Step-1...Completed")
    
    # cumsum cummean
    df['one'] = 1
    df['count'] = df_group['one'].cumsum()
    for feature in feature_list:
        df[f'{feature}_cumsum'] = df_group[feature].cumsum()
        df[f'{feature}_cummean'] = df[f'{feature}_cumsum'] / df['count']
        
    print("Step-2 cumsum cummean ...Completed")
    # lagging
    use_lags = 4
    for lag in range(1, use_lags+1):
        for feature in feature_list:
            # lag 
            df[f'{feature}_lag_{lag}'] = df_group[feature].shift(lag)
            # inverse lag
            df[f'{feature}_lag_inverse_{lag}'] = df_group[feature].shift(-lag)

            # dif lag
            df[f'{feature}_lag_diff_{lag}'] = df[feature] - df[f'{feature}_lag_{lag}']

            # dif inverse lag
            df[f'{feature}_lag_inverse_diff_{lag}'] = df[feature] - df[f'{feature}_lag_inverse_{lag}']

            df = df.drop(columns=[f'{feature}_lag_{lag}', f'{feature}_lag_inverse_{lag}'])
        
    df = df.fillna(0)
    print("Step-3 lagging ...Completed")
    
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    df = df.fillna(0)
    print("Step-4 categorical ...Completed")
    
    rolling_list = [15]
    for roll in rolling_list:
        for feature in feature_list:
            df[[f'{roll}_{feature}_sum',f'{roll}_{feature}_min',
                f'{roll}_{feature}_max',f'{roll}_{feature}_mean']] = (df_group[feature]\
                                                                  .rolling(window=roll,min_periods=1)\
                                                                  .agg({f'{roll}_{feature}_sum':'sum',
                                                                        f'{roll}_{feature}_min':'min',
                                                                        f'{roll}_{feature}_max':'max',
                                                                        f'{roll}_{feature}_mean':'mean'})\
                                                                   .reset_index(level=0,drop=True))
    
    print("Step-5 Sliding window...Completed")
    print()
    
    df = df.fillna(0)
    df = df.drop(['id', 'breath_id','one','count'], axis=1)
    
    return df.astype(np.float16)

In [None]:
%%time
train = add_features(train)
test = add_features(test)
train

In [None]:
train.info()

In [None]:
import gc
gc.collect()

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)

In [None]:
train = train.drop(columns=['pressure'])

In [None]:
%%time
corr = train.corr()

In [None]:
threshold = 1
corr = np.abs(corr)
drop_columns = set()
for i in range(corr.shape[0]):
    for j in range(i+1, corr.shape[0]):
        if corr.iloc[i,j] >= threshold:
            drop_columns.add(corr.columns[j])
            
drop_columns = list(drop_columns)
train = train.drop(columns=drop_columns)
test = test.drop(columns=drop_columns)
print(f'Columns drop size : {len(drop_columns)}')

In [None]:
scaler = RobustScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)

In [None]:
import pickle
pickle.dump(scaler, open('scaler.pickle','wb'))

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, test.shape[-1])

In [None]:
gc.collect()

# Model

In [None]:
with tpu_strategy.scope():
    def dnn_model():
        x_input = Input(shape=(train.shape[-2:]))

        x1 = Bidirectional(LSTM(units=768, return_sequences=True))(x_input)
        x2 = Bidirectional(LSTM(units=512, return_sequences=True))(x1)
        x3 = Bidirectional(LSTM(units=256, return_sequences=True))(x2)
        x4 = Bidirectional(LSTM(units=128, return_sequences=True))(x3)

        z2 = Bidirectional(GRU(units=256, return_sequences=True))(x2)
        z3 = Bidirectional(GRU(units=128, return_sequences=True))(Add()([x3, z2]))
        z4 = Bidirectional(GRU(units=64, return_sequences=True))(Add()([x4, z3]))

        x = Concatenate(axis=2)([x4, z2, z3, z4])

        x = Dense(units=128, activation='selu')(x)

        x_output = Dense(units=1)(x)

        model = Model(inputs=x_input, outputs=x_output,
                      name='DNN_Model')

        model.compile(optimizer='adam',
                      loss='mae'
        )

        return model

In [None]:
model = dnn_model()
model.summary()

In [None]:
plot_model(
    model, 
    to_file='Google_Brain_Keras_Model.png', 
    show_shapes=True,
    show_layer_names=True
)

# Train

In [None]:
with tpu_strategy.scope():
    
    nfold = 5
    VERBOSE = 0
    kf = KFold(n_splits=nfold, shuffle=True, random_state=2021)
    history_list = []
    test_preds = []
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        
        model = dnn_model()

        lr = ReduceLROnPlateau(monitor='val_loss', factor=0.75, 
                               patience=10, verbose=VERBOSE)
        
        save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
        chk_point = ModelCheckpoint(f'./Bidirect_LSTM_model_{fold+1}C.h5', options=save_locally, 
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=50, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        tqdm_callback = tfa.callbacks.TQDMProgressBar(show_epoch_progress=False)
        
        history = model.fit(X_train, y_train, 
                          validation_data=(X_valid, y_valid), 
                          epochs=300,
                          verbose=VERBOSE,
                          batch_size=BATCH_SIZE, 
                          callbacks=[tqdm_callback, lr, chk_point, es])
        
        history_list += [history]
        
        y_true = y_valid.squeeze().reshape(-1, 1)
        y_pred = model.predict(X_valid, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1)
        score = mean_absolute_error(y_true, y_pred)
        print(f"Fold-{fold+1} | OOF Score: {score}")
        
        test_preds.append(model.predict(test, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1).squeeze())

* Fold-1 | OOF Score: 0.16721594333648682
* Fold-2 | OOF Score: 0.17076881229877472
* Fold-3 | OOF Score: 0.16922391951084137
* Fold-4 | OOF Score: 0.16878041625022888
* Fold-5 | OOF Score: 0.16836853325366974

In [None]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = sum(test_preds)/nfold
submission.to_csv('submission_mean.csv', index=False)
submission

In [None]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = np.median(test_preds, axis=0)
submission.to_csv('submission_median.csv', index=False)
submission

In [None]:
for fold, history in enumerate(history_list):
    print(f'Fold : {fold+1}')
    plt.figure(figsize=(15,7))
    for name in ['loss', 'val_loss']:
        plt.plot(history.history[name])
    plt.legend(['loss', 'val_loss'])
    plt.show()
