In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import lightgbm as lgb
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_squared_error as rmse
import tensorflow
from tensorflow.keras import Sequential,Model
from tensorflow.keras.layers import Dense,Dropout, Input, LSTM
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_train = pd.read_csv("/kaggle/input/ventilator-pressure-prediction/train.csv")
df_test = pd.read_csv("/kaggle/input/ventilator-pressure-prediction/test.csv")

In [None]:
def log_exp_return(series):
    return np.exp(np.log1p(series).diff(1).fillna(0))

def preprocessing(df):
    # time diff
    df['time_diff'] = df['time_step'].groupby(df['breath_id']).diff(1).fillna(0)
    
    # u_in parameter
    df['u_in_ratio'] = df['u_in'].groupby(df['breath_id']).apply(log_exp_return)
    df['last_value_u_in'] = df['u_in'].groupby(df['breath_id']).transform('last')
    df['first_value_u_in'] = df['u_in'].groupby(df['breath_id']).transform('first')

    # u_in area
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum() 
    
    # u_in shift change 
    for i in np.arange(1, 3, 1):
        df[f'u_in_lag_fwrd{i}'] = df['u_in'].groupby(df['breath_id']).shift(i).fillna(0)
        df[f'u_in_lag_back{i}'] = df['u_in'].groupby(df['breath_id']).shift(int(-i)).fillna(0)
       
    # R, C parameter
    df['RC'] = df['C'] * df['R']
    df['R/C'] = df['R'] / df['C']
    df['C/R'] = df['C'] / df['R']
    df['R'] = df['R'].astype('category')
    df['C'] = df['C'].astype('category')
    df['RC'] = df['RC'].astype('category')
    df['R/C'] = df['R/C'].astype('category')
    df['C/R'] = df['C/R'].astype('category')
    
    return df

In [None]:
xtrain = preprocessing(df_train)
xtest = preprocessing(df_test)
xtrain = xtrain.drop(["id","breath_id","pressure"],axis=1)
xtest = xtest.drop(["id", "breath_id"], axis=1)
ytrain = df_train["pressure"]

In [None]:
scaler = RobustScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

In [None]:
# x_train, x_test, y_train, y_test = train_test_split(xtrain, ytrain, test_size = 0.2, random_state=1)

reg = lgb.LGBMRegressor()
reg.fit(xtrain, ytrain)

# rmse(pred , y_test)

In [None]:
# 16.63   --   lgb

In [None]:
# model = Sequential()
# model.add(LSTM(128))
# model.add(LSTM(64))
# model.add(Dense(1, activation="relu"))

In [None]:
# model.compile(loss="mse", optimizer="Adam", metrics=[tensorflow.keras.metrics.RootMeanSquaredError(name="rmse")])
# model.fit(xtrain, ytrain, validation_split = 0.2, epochs = 2, batch_size = 32)

In [None]:
pred = reg.predict(xtest)

final = pd.DataFrame()
final["id"] = df_test["id"]
final["pressure"] = pred
final.to_csv("submission.csv", index=False)