In [None]:
import numpy as np
import pandas as pd

from sklearn import model_selection, preprocessing
import xgboost as xgb

train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

id_test = test.id

y_train = train["price_doc"] * .969 + 10
x_train = train.drop(["id", "timestamp","price_doc"], axis=1)

x_test = test.drop(["id", "timestamp"], axis=1)
x_all = pd.concat([x_train,x_test])

In [None]:
for c in x_all.columns:
    if x_all[c].dtype == 'object':
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(x_all[c].values)) 
        x_all[c] = lbl.transform(list(x_all[c].values))

num_train = len(y_train)
x_train = x_all[:num_train]
print('x_train:', x_train.shape)
x_test = x_all[num_train:]
print('x_test:', x_test.shape)


In [None]:
xgb_params = {
    'eta': 0.05,
    'max_depth': 5,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'silent': 1
}


dtrain = xgb.DMatrix(x_train, y_train)
dtest = xgb.DMatrix(x_test)

cv_output = xgb.cv(xgb_params, dtrain, num_boost_round=1000, early_stopping_rounds=20,
    verbose_eval=50, show_stdv=False)

num_boost_rounds = len(cv_output)
print('num_boost_rounds:',num_boost_rounds)
model = xgb.train(dict(xgb_params, silent=1), dtrain, num_boost_round= num_boost_rounds)


y_predict = model.predict(dtest)
output = pd.DataFrame({'id': id_test, 'price_doc': y_predict})

output.to_csv('xgbSub.csv', index=False)
print('done!' )