In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

from tqdm import tqdm
import time, logging, gc
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import *
from tensorflow.keras.callbacks import *

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

from IPython.display import display



## Loading dataset

In [None]:

train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')

test  = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')


## Looking into dataset

In [None]:
train

In [None]:
test

In [None]:
print(train.shape)
print(test.shape)

In [None]:
## Looking for missing values

print('\n\nTrain\n\n')
print(train.isnull().sum())

print('\n\nTest\n\n')
print(test.isnull().sum())

## Exploratory Data Analysis 

In [None]:
plt.style.use("seaborn")

color=plt.cm.flag(np.linspace(0,2,4))
train["R"].value_counts().plot.bar(color=color,figsize=(10,6))

plt.title("R")
plt.xlabel(' ')
plt.ylabel("total count")
plt.show()

In [None]:
plt.style.use("seaborn")

color=plt.cm.flag(np.linspace(0,2,4))
train["C"].value_counts().plot.bar(color=color,figsize=(10,6))

plt.title("C")
plt.xlabel(' ')
plt.ylabel("total count")
plt.show()

In [None]:
# relationship between features

corr = train.corr()
fig, ax = plt.subplots(figsize=(12,8))
sns.heatmap(corr)

In [None]:
train["u_out"].value_counts().plot.pie(figsize=(12,8),explode=(0.01,0.01),autopct="%1.1f%%")
plt.title("U_out",fontsize=18)
plt.show()

In [None]:
# Pressure Distribution
# Target feature distribution It took time so u can remove # and have a look at it.

#sns.histplot(data=train['pressure'], x= train["pressure"], kde=True)

## Some basic info.
1.   When we have U_out = 0 That means the inhale pressure from patient.
2.   When we have U_out = 1 That means exhale pressure form patient.
3.   U_in represents the valve pressue of Oxygen provided to patient.

In [None]:
for i in range(1,20,1):
    one_breath = train[train["breath_id"]==i]

    plt.figure(figsize=(6,4));
    sns.lineplot(x = 'id',y='pressure',data=one_breath[one_breath['u_out']==0],color='green',label='pressure inhale');
    sns.lineplot(x = 'id',y='pressure',data=one_breath[one_breath['u_out']==1],color='orange',label='pressure exhale');
    sns.lineplot(x = 'id',y='u_in',data=one_breath,color='blue',label='valve position')
    plt.title(f"Variation of Pressure and Input valve position during breath {i}");
    plt.legend();

## Feature Engineering

In [None]:
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    

    
    df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['u_in_lag'] = df['u_in_lag']*df['breath_id_lagsame']
    df['u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['u_in_lag2'] = df['u_in_lag2']*df['breath_id_lag2same']
    df['u_out_lag2'] = df['u_out'].shift(2).fillna(0)
    df['u_out_lag2'] = df['u_out_lag2']*df['breath_id_lag2same']
    #df['u_in_lag'] = df['u_in'].shift(2).fillna(0)
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['RC'] = df['R']+df['C']
    df = pd.get_dummies(df)
    return df

train = add_features(train)
test = add_features(test)


## Extracting target feature

In [None]:
y = train['pressure'].to_numpy().reshape(-1, 80)
train.drop(['pressure','id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1)

In [None]:
train.shape

In [None]:
train

In [None]:
y.shape

In [None]:
y

## Robust scaling

In [None]:
rb = RobustScaler()

rb.fit(train)
train = rb.transform(train)
test = rb.transform(test)

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])
gc.collect

In [None]:
# Detect hardware, return appropriate distribution strategy

print(tf.version.VERSION)
tf.get_logger().setLevel(logging.ERROR)
try: # detect TPU
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # detect GPU(s) and enable mixed precision
    strategy = tf.distribute.MirroredStrategy() # works on GPU and multi-GPU
    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    tf.config.optimizer.set_jit(True) # XLA compilation
    tf.keras.mixed_precision.experimental.set_policy(policy)
    print('Mixed precision enabled')
print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
def plot_hist(hist):
    plt.plot(hist.history["loss"])
    plt.plot(hist.history["val_loss"])
    plt.title("model performance")
    plt.ylabel("mean_absolute_error")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

## Model LSTM Bidirectional

In [None]:
# Functoin written for model lstm bidirectional
def create_model():   
    with strategy.scope():
    
        model = Sequential([
            
            Input(shape=(80, 25)),
            Bidirectional(LSTM(900, return_sequences=True)),
            Bidirectional(LSTM(700, return_sequences=True)),
            Bidirectional(LSTM(512, return_sequences=True)),
            Bidirectional(LSTM(256, return_sequences=True)),
            Bidirectional(LSTM(128, return_sequences=True)),
            Bidirectional(LSTM(64, return_sequences=True)),
            Dense(256, activation='selu'),
            #Dropout(0.04),
            Dense(1)
        ])

        model.compile(optimizer="adam",loss = "mae")
    return(model)

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

test_preds = []

for fold, (train_idx, test_idx) in enumerate(kf.split(train, y)):
    print(f"****** fold: {fold+1} *******")
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = y[train_idx], y[test_idx]
    
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 200*((len(train)*0.8)/512), 1e-5)
    es = EarlyStopping(monitor='val_loss',mode='min', patience=35, verbose=1,restore_best_weights=True)
    
    model = create_model()
        
    history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=300, batch_size = 512, callbacks = [es,tf.keras.callbacks.LearningRateScheduler(scheduler)])
    test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())
    plot_hist(history)
    del X_train, X_valid, y_train, y_valid, model
    gc.collect()

In [None]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = sum(test_preds)/5  #test_preds[1]
submission.to_csv('submission.csv', index=False)
submission  

### Do upvote if it helps you