In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Description
What do doctors do when a patient has trouble breathing? They use a ventilator to pump oxygen into a sedated patient's lungs via a tube in the windpipe. But mechanical ventilation is a clinician-intensive procedure, a limitation that was prominently on display during the early days of the COVID-19 pandemic. At the same time, developing new methods for controlling mechanical ventilators is prohibitively expensive, even before reaching clinical trials. High-quality simulators could reduce this barrier.

Current simulators are trained as an ensemble, where each model simulates a single lung setting. However, lungs and their attributes form a continuous space, so a parametric approach must be explored that would consider the differences in patient lungs.

Partnering with Princeton University, the team at Google Brain aims to grow the community around machine learning for mechanical ventilation control. They believe that neural networks and deep learning can better generalize across lungs with varying characteristics than the current industry standard of PID controllers.

In this competition, you’ll simulate a ventilator connected to a sedated patient's lung. The best submissions will take lung attributes compliance and resistance into account.

If successful, you'll help overcome the cost barrier of developing new methods for controlling mechanical ventilators. This will pave the way for algorithms that adapt to patients and reduce the burden on clinicians during these novel times and beyond. As a result, ventilator treatments may become more widely available to help patients breathe.

Reference - Addison Howard, alexjyu, Daniel Suo, Will Cukierski. (2021). Google Brain - Ventilator Pressure Prediction. Kaggle. https://kaggle.com/competitions/ventilator-pressure-prediction

# Dataset Description
The ventilator data used in this competition was produced using a modified open-source ventilator connected to an artificial bellows test lung via a respiratory circuit. The diagram below illustrates the setup, with the two control inputs highlighted in green and the state variable (airway pressure) to predict in blue. The first control input is a continuous variable from 0 to 100 representing the percentage the inspiratory solenoid valve is open to let air into the lung (i.e., 0 is completely closed and no air is let in and 100 is completely open). The second control input is a binary variable representing whether the exploratory valve is open (1) or closed (0) to let air out.

In this competition, participants are given numerous time series of breaths and will learn to predict the airway pressure in the respiratory circuit during the breath, given the time series of control inputs.

## Importing necessary packages and datasets

In [None]:
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from collections import Counter

In [None]:
warnings.filterwarnings("ignore")
small = True #Meaning that doing a test with a small dataset 

In [None]:
train=pd.read_csv("/kaggle/input/ventilator-pressure-prediction/train.csv")
test=pd.read_csv("/kaggle/input/ventilator-pressure-prediction/test.csv")


In [None]:
train

In [None]:
#Sampling the 10% of the train dataset
if small:
    train=train[train['breath_id']<1000].reset_index(drop=True)
    display(train)

## Sequential Modeling

In [None]:
#Use Bi-LSTM mode for time-series and train the dataset with in-hale and ex-hale and check the cycle
seq_len = 80

y_train = train.pressure.to_numpy().reshape(-1, seq_len)
X_train = train.drop(columns=["pressure", "id", "breath_id"])
test = test.drop(columns=["id", "breath_id"])

In [None]:
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split

scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
test = scaler.transform(test)

# reshaping
X_train = X_train.reshape(-1, seq_len, X_train.shape[-1])
test = test.reshape(-1, seq_len, test.shape[-1])

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dense

In [None]:
# configure variables
epochs = 200
batch_size = 1024
if small:
  epochs = 150
  batch_size = 128


scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-5)

model = Sequential([
                    Input(shape=X_train.shape[1:]),
                    Bidirectional(LSTM(250, return_sequences=True)),
                    Bidirectional(LSTM(200, return_sequences=True)),
                    Bidirectional(LSTM(150, return_sequences=True)),
                    Bidirectional(LSTM(100, return_sequences=True)),
                    Dense(50, activation='relu'),
                    Dense(1) # output layer
])

model.compile(optimizer='adam', loss='mae')
model.summary()

## Training 

In [None]:
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[scheduler],
                    verbose=1)

In [None]:
result = model.predict(test).squeeze().reshape(-1, 1).squeeze()
result

In [None]:
len(result)

In [None]:
sub = pd.read_csv("/kaggle/input/ventilator-pressure-prediction/sample_submission.csv")
sub
sub['pressure'] = result


In [None]:
sub.to_csv("sub_1234.csv",index=0)

Reference - fastcampus course "모두를위한 딥러닝"