In [None]:
import gc

import numpy as np
import pandas as pd

from pprint import pprint

from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.metrics import mean_absolute_error

import xgboost as xg


# Reading the dataset

In [None]:
%%time
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

In [None]:
print('train shape:', train.shape)
print('test shape:', test.shape)

In [None]:
train.head()

In [None]:
test.head()

# Data preprocessing

In [None]:
train["u_in_cumsum"] = (train['u_in']).groupby(train['breath_id']).cumsum()
test['u_in_cumsum'] = (test['u_in']).groupby(test['breath_id']).cumsum()

In [None]:
train['u_in_lag'] = train.groupby('breath_id')['u_in'].shift(2)
train = train.fillna(0)
test['u_in_lag'] = test.groupby('breath_id')['u_in'].shift(2)
test = test.fillna(0)


In [None]:
y = train['pressure']
X = train.drop(['id','breath_id', 'pressure'],axis=1)

In [None]:
del train
gc.collect()

In [None]:
transformer = Normalizer().fit(X)

In [None]:
transformer.transform(X)

# Applying XGBoost Regressor

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [None]:
xgbr = xg.XGBRegressor(seed = 42, n_jobs=-1)

In [None]:
n_estimators = [5, 10, 15]
max_depth = [3, 5, 7]
min_child_weight = [0.05, 0.10, 0.15]


random_grid = {
    'n_estimators' : n_estimators,
    'max_depth' : max_depth,
    'min_child_weight' : min_child_weight
}


pprint(random_grid)

In [None]:
clf = RandomizedSearchCV(estimator = xgbr, param_distributions = random_grid, n_iter = 5, cv = 3, verbose = 2, random_state = 42, n_jobs = -1, scoring = 'neg_mean_squared_error')

In [None]:
%%time
clf.fit(X_train, y_train)

In [None]:
del y,  X_train, y_train
gc.collect()

In [None]:
print('Best Score: ', clf.best_score_) 
print('Best Params: ', clf.best_params_)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
mean_absolute_error(y_test, y_pred)

# Exporting predictions to appropriate submission format

In [None]:
X = test.drop(['id','breath_id'], axis=1)

In [None]:
del X_test, y_test
gc.collect()

In [None]:
y_predicted = clf.predict(X)

In [None]:
data = {'id' : test['id'], 'pressure' : y_predicted}
df = pd.DataFrame(data=data)
df.head()

In [None]:
df.to_csv('submission.csv', index=False)

In [None]:
del X, clf, y_pred, test, y_predicted 
gc.collect()