In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from xgboost import XGBRegressor

import gc

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
train

In [None]:
train = train.drop(columns = 'id')
test = test.drop(columns = 'id')

In [None]:
train['RC_sum'] = train['R'] + train['C']
train['RC_div'] = train['R'] / train['C']
train['u_in_cumsum'] = (train['u_in']).groupby(train['breath_id']).cumsum()
train['time_lag'] = train['time_step'].shift(1).fillna(0)
train['u_in_lag'] = train['u_in'].shift(1).fillna(0)
train['u_out_lag'] = train['u_out'].shift(1).fillna(0)

test['RC_sum'] = test['R'] + test['C']
test['RC_div'] = test['R'] / test['C']
test['u_in_cumsum'] = (test['u_in']).groupby(test['breath_id']).cumsum()
test['time_lag'] = test['time_step'].shift(1).fillna(0)
test['u_in_lag'] = test['u_in'].shift(1).fillna(0)
test['u_out_lag'] = test['u_out'].shift(1).fillna(0)

In [None]:
train['R'] = train['R'].astype(str)
train['C'] = train['C'].astype(str)

test['R'] = test['R'].astype(str)
test['C'] = test['C'].astype(str)

train = pd.get_dummies(train)
test = pd.get_dummies(test)

In [None]:
from sklearn.model_selection import GroupKFold

kf = GroupKFold()

In [None]:
train2 = train.drop(columns = 'pressure').values
y = train['pressure'].values

In [None]:
from sklearn.preprocessing import RobustScaler

rb = RobustScaler()

rb.fit(train2)
train3 = rb.transform(train2)
test2 = rb.transform(test)

In [None]:
group = train['breath_id']

In [None]:
del train, test, rb
gc.collect

In [None]:
from sklearn.metrics import mean_absolute_error

result = 0

for train_index, valid_index in kf.split(train3, groups = group):
    x_train, y_train = train2[train_index], y[train_index]
    x_valid, y_valid = train2[valid_index], y[valid_index]

    XGB = XGBRegressor(n_estimators = 100000,
                    tree_method = 'gpu_hist',
                    predictor = 'gpu_predictor',
                     subsample = 0.7,
                    colsample_bytree=0.8,
                    learning_rate = 0.02,
                    max_depth =10)

    XGB.fit(x_train, y_train, eval_set = [(x_valid, y_valid)], early_stopping_rounds=200, verbose=50, eval_metric='mae')
    pred = XGB.predict(x_valid)

    print('MAE Score :', mean_absolute_error(y_valid, pred))

    result += XGB.predict(test2) / 5
  
    del x_train, y_train, x_valid, y_valid, XGB
    gc.collect()

In [None]:
sub = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
sub['pressure'] = result
sub

In [None]:
sub.to_csv('submission.csv', index = 0)