# Imports

In [22]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

---

# Carga de datos

In [10]:
features = pd.read_pickle("../../features/auctions_train_ventana_1.pkl")
targets = pd.read_pickle("../../targets/targets_ventana_1_auctions.pkl")
train = features.merge(targets, left_index=True, right_index=True)

---

# Preparación del set de datos para XGBoost

In [15]:
X, y = train.iloc[:,:-1], train.iloc[:,-1]
data_dmatrix = xgb.DMatrix(data=X,label=y)

In [17]:
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.2, random_state=123)

In [69]:
xg_reg = xgb.XGBRegressor(colsample_bytree = 0.7, learning_rate = 0.05,
                n_estimators = 500, n_jobs=6)

In [70]:
xg_reg.fit(X_train,y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.7, gamma=0, learning_rate=0.05, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=500,
       n_jobs=6, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [71]:
preds = xg_reg.predict(X_test)

In [72]:
rmse = np.sqrt(mean_squared_error(y_test, preds))
print("RMSE: %f" % (rmse))

RMSE: 89131.198549


In [80]:
params = {"objective":"reg:linear",'colsample_bytree': 0.9,
          'learning_rate': 0.05}

cv_results = xgb.cv(dtrain=data_dmatrix, params=params, nfold=10,
                    num_boost_round=100, early_stopping_rounds=20,
                    metrics="rmse", as_pandas=True, seed=123)

[06:18:43] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 126 extra nodes, 0 pruned nodes, max_depth=6
[06:18:43] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[06:18:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[06:18:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[06:18:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[06:18:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[06:18:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[06:18:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 122 extra nodes, 0 pruned nodes, max_depth=6
[06:18:4

In [79]:
cv_results

Unnamed: 0,train-rmse-mean,train-rmse-std,test-rmse-mean,test-rmse-std
0,152780.465625,63.921372,152782.992188,609.913624
1,147847.953125,67.336765,147854.459375,596.321719
2,143268.531250,78.347831,143278.293750,577.294693
3,138994.343750,83.374200,139008.287500,551.550746
4,135006.446875,76.082420,135023.385937,538.062981
5,131291.631250,71.179946,131314.059375,528.477535
6,127848.816406,58.794154,127876.297657,524.401653
7,124670.230469,73.154691,124702.082031,499.114021
8,121721.457031,83.898329,121758.535937,470.935485
9,118991.569531,73.225781,119034.164843,458.444258


In [84]:
print((cv_results["test-rmse-mean"]).tail(1))

99    88928.904688
Name: test-rmse-mean, dtype: float64
