## References
- https://www.kaggle.com/c/ventilator-pressure-prediction/discussion/273974
- https://www.kaggle.com/susnato/lgbm-starter
- https://www.kaggle.com/ryanbarretto/tensorflow-lstm-baseline
- https://www.kaggle.com/akihironomura/tps-lightgbm-optuna-kfold

## Import Modules

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
# from lightgbm import LGBMRegressor
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
import optuna

from warnings import filterwarnings
filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

## EDA

In [None]:
train.head()

In [None]:
train.info()

In [None]:
train.describe()

In [None]:
train.isnull().sum()

In [None]:
train['breath_id'].value_counts()

In [None]:
train['breath_id'].nunique()

In [None]:
sns.countplot(x=train['R'])

In [None]:
sns.countplot(x=train['C'])

In [None]:
sns.distplot(train['u_in'], kde=False, bins=10, color='blue')

In [None]:
sns.distplot(train['u_out'], kde=False, bins=10, color='blue')

In [None]:
sns.distplot(train['pressure'], kde=False, bins=10, color='blue')

In [None]:
test.head()

## Feature Engineering

In [None]:
train['u_in_cumsum'] = train['u_in'].groupby(train['breath_id']).cumsum()
test['u_in_cumsum'] = test['u_in'].groupby(test['breath_id']).cumsum()

In [None]:
train['u_in_lag'] = train['u_in'].shift(2)
train = train.fillna(0)

test['u_in_lag'] = test['u_in'].shift(2)
test = test.fillna(0)

In [None]:
X = train.drop(['id', 'breath_id', 'u_out', 'pressure'], axis = 1)
X_test = test.drop(['id', 'breath_id', 'u_out'], axis = 1)
y = train['pressure']

## Hyperparameter Tuning using Optuna

In [None]:
def objective(trial,data=X,target=y):
    
    X_train, X_valid, y_train, y_valid = train_test_split(data, target, train_size=0.8, test_size=0.2,random_state=0)
        
    params = {
        'objective': 'regression',
        'metric': 'mae', 
        'boosting_type': 'gbdt',
        'n_estimators': 1000,
        'random_state': 42,
        'learning_rate': trial.suggest_categorical('learning_rate', [0.006,0.008,0.01,0.014,0.017,0.02]),
        'subsample': trial.suggest_loguniform('subsample', 0.4, 1.0),
        'subsample_freq': trial.suggest_loguniform('subsample_freq', 0.4, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.4, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 5, 256),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
    }
    model = lgb.LGBMRegressor(**params) 
    model.fit(X_train, y_train)
    
    preds = model.predict(X_valid)
    mae = mean_absolute_error(y_valid, preds)
    
    return mae

In [None]:
# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=10)
# print('Number of finished trials:', len(study.trials))
# print('Best trial:', study.best_trial.params)

## Model Training

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2,random_state=0)

In [None]:
lgb_params = {
    'objective': 'regression',
    'metric': 'mae', 
    'boosting_type': 'gbdt',
    'n_estimators': 1000,
    'random_state': 42,
    'learning_rate': 0.017,
    'subsample': 0.6706735076307812,
    'subsample_freq': 0.9731836936473381,
    'colsample_bytree': 0.7981147731267384,
    'reg_alpha': 0.29250836566881794,
    'reg_lambda': 0.0032438602599939702,
    'min_child_weight': 134,
    'min_child_samples': 26,
    'bagging_fraction': 0.6263245217964235,
    'bagging_freq': 1,
}

In [None]:
model = lgb.LGBMRegressor(**lgb_params)
model

In [None]:
model.fit(X_train, y_train)

## Evaluation

In [None]:
pred_valid = model.predict(X_valid)
print('Mean Absolute Error: ', mean_absolute_error(y_valid, pred_valid))

## Make Submission

In [None]:
preds = model.predict(X_test)

In [None]:
submission.pressure = preds
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)

## If you like this kernel, please upvote:)