In [None]:
import pandas as pd
import numpy as np
from sklearn import metrics
import time

In [None]:
df = pd.read_csv('../input/google-brain5folds/Google_Brain-5Folds.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
sample_submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
test.head()

In [None]:
useful_features =  [c for c in df.columns if c not in ("id", "pressure", "kfold")]
test = test[useful_features]

In [None]:
from xgboost import XGBRegressor

In [None]:
paramsxgb = {'max_depth':5}
paramsxgb['random_state'] = 42
paramsxgb['tree_method'] ='gpu_hist'
paramsxgb['gpu_id'] = 0

In [None]:
df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
test['u_in_cumsum'] = (test['u_in']).groupby(test['breath_id']).cumsum()

df['R*C'] = df['R'] * df['C']
test['R*C'] = test['R'] * test['C']

## add last_value_u_in
idxmax_time_step = df.groupby('breath_id')['time_step'].idxmax()
last_value_u_in = df.loc[idxmax_time_step, ['breath_id','u_in']]
last_value_u_in.columns = ['breath_id','last_value_u_in']

df = df.merge(last_value_u_in, on='breath_id')

idxmax_time_step = test.groupby('breath_id')['time_step'].idxmax()
last_value_u_in = test.loc[idxmax_time_step, ['breath_id','u_in']]
last_value_u_in.columns = ['breath_id','last_value_u_in']

test = test.merge(last_value_u_in, on='breath_id')

## add lag
df['u_in_lag'] = df['u_in'].shift(1)
df = df.fillna(0)

test['u_in_lag'] = test['u_in'].shift(1)
test = test.fillna(0)


useful_features =  [c for c in df.columns if c not in ("id", "pressure", "kfold")]
test = test[useful_features]

test.head()

In [None]:
final_predictions = []
valid_scores = []
for fold in range(5):
    print('Fold: {}'.format(fold))
    start = time.time()
    xtrain =  df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)
    xtest = test.copy()
    
    valid_ids = xvalid.id.values.tolist()

    ytrain = xtrain.pressure
    yvalid = xvalid.pressure
    
    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]
    
    model =  XGBRegressor(**paramsxgb,n_estimators=3000)
    model.fit(xtrain, ytrain, eval_set = [(xvalid,yvalid)], early_stopping_rounds = 100, verbose = False)
   
    test_preds = model.predict(xtest)
    preds_valid = model.predict(xvalid)
    
    final_predictions.append(test_preds)
    
    mae= metrics.mean_absolute_error(yvalid, preds_valid)
    print(fold,mae)
    
    valid_scores.append(mae)
    print(time.time()-start)

print('average of all valid folds {}'.format(np.mean(valid_scores)))

In [None]:
sample_submission.pressure = np.mean(np.column_stack(final_predictions),axis=1)

sample_submission.to_csv('secsub.csv',index=False)
