In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from preprocessing import preprocessing
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV

cols_eliminar = ['fecha', 'id', 'titulo', 'descripcion','direccion',\
                'lat', 'lng', 'posicion','provincia','ciudad','gimnasio','usosmultiples',\
                'escuelascercanas','centroscomercialescercanos']

df_train, _ = preprocessing(False, cols_eliminar)
df_train = df_train.astype({'provincia_ordinal':'int', 'ciudad_ordinal':'int'})

X = np.array(df_train.drop(columns=['precio', 'log_precio']).values, 'float32')
Y = np.array(df_train[['log_precio']].values, 'float32')

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [8]:
xgboost = XGBRegressor()

search = GridSearchCV(xgboost, {'min_split_loss':[0,0.2,0.4], 'max_depth':[15,18,21,25]})

In [None]:
search.fit(X_train, Y_train)
search.cv_results_

In [None]:
n_estimators = []
for param in [i*10 for i in range(13,30)]:
	model = XGBRegressor(subsample=0.7,min_child_weight=3,
	colsample_bytree=0.7,learning_rate=0.1,nthread=8,num_parallel_tree=10, n_estimators=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	n_estimators.append((param, mean_average_error(model.predict(X_test), Y_test)))

min_child_weight = []
for param in [0.05,0.1,0.5,0.7,0.8,0.9,1,1.1,1.2]:
	model = XGBRegressor(n_estimators=150,max_depth=20,subsample=0.7,
	colsample_bytree=0.7,learning_rate=0.1,nthread=8,num_parallel_tree=10, min_child_weight=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	min_child_weight.append((param, mean_average_error(model.predict(X_test), Y_test)))

learning_rate = []
for param in [i/100 for i in range(1,11)]+[0.005, 0.015]:
	model = XGBRegressor(n_estimators=150,max_depth=20,subsample=0.7,min_child_weight=3,
	colsample_bytree=0.7, nthread=8,num_parallel_tree=10, learning_rate=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	learning_rate.append((param, mean_average_error(model.predict(X_test), Y_test)))

subsample = []
for param in [i/10 for i in range(3,10)]:
	model = XGBRegressor(n_estimators=150,max_depth=20,min_child_weight=3,
	colsample_bytree=0.7,learning_rate=0.1,nthread=8,num_parallel_tree=10, subsample=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	subsample.append((param, mean_average_error(model.predict(X_test), Y_test)))

reg_lambda = []
for param in [i/10 for i in range(5)]:
	model = XGBRegressor(n_estimators=150,max_depth=20,subsample=0.7,min_child_weight=3,
	colsample_bytree=0.7,learning_rate=0.1,nthread=8,num_parallel_tree=10, reg_lambda=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	reg_lambda.append((param, mean_average_error(model.predict(X_test), Y_test)))

reg_alpha = []
for param in [i/10 for i in range(5)]:
	model = XGBRegressor(n_estimators=150,max_depth=20,subsample=0.7,min_child_weight=3,
	colsample_bytree=0.7,learning_rate=0.1,nthread=8,num_parallel_tree=10, reg_alpha=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	reg_alpha.append((param, mean_average_error(model.predict(X_test), Y_test)))

colsample_bytree = []
for param in [i/10 for i in range(3,10)]:
	model = XGBRegressor(n_estimators=150,max_depth=20,subsample=0.7,min_child_weight=3,
	learning_rate=0.1,nthread=8,num_parallel_tree=10, colsample_bytree=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	colsample_bytree.append((param, mean_average_error(model.predict(X_test), Y_test)))

grow_policy = []
for param in ['depthwise', 'lossguide']:
	model = XGBRegressor(n_estimators=150,max_depth=20,subsample=0.7,min_child_weight=3,
	colsample_bytree=0.7,learning_rate=0.1,nthread=8,num_parallel_tree=10, grow_policy=param)
	model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)],
	eval_metric=['mae', 'rmse'], early_stopping_rounds=5)
	grow_policy.append((param, mean_average_error(model.predict(X_test), Y_test)))