In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor

import optuna 
from optuna import Trial, visualization
from optuna.samplers import TPESampler

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.metrics import mean_absolute_error

import warnings
import joblib
warnings.filterwarnings(action='ignore')

In [None]:
def create_new_feat(df):
    
    df["u_in_sum"]         = df.groupby("breath_id")["u_in"].transform("sum")
    df["u_in_std"]         = df.groupby("breath_id")["u_in"].transform("std")
    df["u_in_min"]         = df.groupby("breath_id")["u_in"].transform("min")
    df["u_in_first"]       = df.groupby("breath_id")["u_in"].transform("first")
    df["u_in_last"]        = df.groupby("breath_id")["u_in"].transform("last")
    df["time_passed"]      = df.groupby("breath_id")["time_step"].diff()
    df['area']             = df['time_step'] * df['u_in']
    df['area_2']           = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum']      = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['u_in_lag1']        = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1']       = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1']   = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1']  = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2']        = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2']       = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2']   = df.groupby('breath_id')['u_in'].shift(-2) 
    df['u_out_lag_back2']  = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3']        = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3']       = df.groupby('breath_id')['u_out'].shift(3) 
    df['u_in_lag_back3']   = df.groupby('breath_id')['u_in'].shift(-3) 
    df['u_out_lag_back3']  = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4']        = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4']       = df.groupby('breath_id')['u_out'].shift(4) 
    df['u_in_lag_back4']   = df.groupby('breath_id')['u_in'].shift(-4) 
    df['u_out_lag_back4']  = df.groupby('breath_id')['u_out'].shift(-4) 
    
    df = df.fillna(0)
    
    df['breath_id__u_in__diffmax']  = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['cross']                     = df['u_in']*df['u_out']
    df['cross2']                    = df['time_step']*df['u_out']
    df['R']                         = df['R'].astype(str)
    df['C']                         = df['C'].astype(str)
    df['R__C']                      = df["R"].astype(str) + '__' + df["C"].astype(str)
    
    df = pd.get_dummies(df)

    df['time_diff']  = (df['time_step']).groupby(df['breath_id']).diff(1)
    df['time_diff2'] = (df['time_step']).groupby(df['breath_id']).diff(2)
    df['time_diff3'] = (df['time_step']).groupby(df['breath_id']).diff(3)
    df['time_diff4'] = (df['time_step']).groupby(df['breath_id']).diff(4)
    df['time_diff5'] = (df['time_step']).groupby(df['breath_id']).diff(5)
    df['time_diff6'] = (df['time_step']).groupby(df['breath_id']).diff(6)
    df['time_diff7'] = (df['time_step']).groupby(df['breath_id']).diff(7)
    df['time_diff8'] = (df['time_step']).groupby(df['breath_id']).diff(8)
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2'] 
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2'] 
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3'] 
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3'] 
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4'] 
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4'] 
    
    #####################################     ADD     ########################################################
    df['ewm_u_in_mean'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).mean().reset_index(level=0,drop=True)
    df['ewm_u_in_std'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).std().reset_index(level=0,drop=True)
    df['ewm_u_in_corr'] = df.groupby('breath_id')['u_in'].ewm(halflife=10).corr().reset_index(level=0,drop=True)
    
    df['rolling_10_mean'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).mean().reset_index(level=0,drop=True)
    df['rolling_10_max'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).max().reset_index(level=0,drop=True)
    df['rolling_10_std'] = df.groupby('breath_id')['u_in'].rolling(window=10, min_periods=1).std().reset_index(level=0,drop=True)
    
    df['expand_mean'] = df.groupby('breath_id')['u_in'].expanding(2).mean().reset_index(level=0,drop=True)
    df['expand_max'] = df.groupby('breath_id')['u_in'].expanding(2).max().reset_index(level=0,drop=True)
    df['expand_std'] = df.groupby('breath_id')['u_in'].expanding(2).std().reset_index(level=0,drop=True)
    #########################################################################################################
    
    return df

In [None]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
train_df = create_new_feat(train_df)
train_df = train_df.fillna(0)

In [None]:
x_col = train_df.iloc[:, ~train_df.columns.str.contains('pressure')].columns[2:]
y_col = train_df.iloc[:, train_df.columns.str.contains('pressure')].columns
print(x_col)
print(y_col)

In [None]:
sc = MinMaxScaler()
# sc = RobustScaler()
sc.fit(train_df[x_col])
train_df[x_col] = sc.transform(train_df[x_col])

train_x = train_df.loc[:int(train_df.shape[0]*0.6), x_col]
train_y = train_df.loc[:int(train_df.shape[0]*0.6), y_col]
# valid_x = train_df.loc[int(train_df.shape[0]*0.6):, x_col]
# valid_y = train_df.loc[int(train_df.shape[0]*0.6):, y_col]

del train_df

In [None]:
test_df = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
test_df = create_new_feat(test_df)
test_df = test_df.fillna(0)
test_df[x_col] = sc.transform(test_df[x_col])

In [None]:
study = joblib.load('../input/pkl-file/optuna_result.pkl')
df = study.trials_dataframe()
df.head(3)

In [None]:
trial = study.best_trial
trial_params = trial.params
print('Best Trial: score {},\nparams {}'.format(trial.value, trial_params))

In [None]:
final_model = XGBRegressor(**trial_params, tree_method='gpu_hist', predictor='gpu_predictor')
final_model.fit(train_x, train_y)

In [None]:
joblib.dump(final_model, './final_model.pkl')

In [None]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission['pressure'] = final_model.predict(test_df[x_col])
submission.to_csv('./submission.csv', index=False)