### =====================================================================
### IMPORTACIÓN GENERAL DE LA INFORMACIÓN.
### =====================================================================

In [28]:
## IMPORTACIÓN GENERAL DE LIBRERIAS Y VISUALIZACIÓN DE DATOS (matplotlib y seaborn)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as DT
import warnings
import descartes
import geopandas as gpd
import json
import requests
import geocoder

# Random Forest.
from sklearn.ensemble import RandomForestRegressor
from shapely.geometry import Point, Polygon
from urllib2 import urlopen

# XGBoost.
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

%matplotlib inline
warnings.filterwarnings('ignore')
plt.style.use('default') 
sns.set(style="whitegrid") 
plt.rcParams['figure.figsize'] = (15, 10)

### =====================================================================
### ALGORITMOS DE MACHINE LEARNING:
### =====================================================================

In [29]:
# LECTURAS DE CSV YA PROCESADOS.
train = pd.read_csv('DATA/train_procesado.csv')
test = pd.read_csv('DATA/test_procesado.csv')

In [30]:
Identificador = pd.DataFrame()
Identificador['id'] = test['id']

In [31]:
train = train.drop('id', axis = 1)
test = test.drop('id', axis = 1)

#train = train.drop('mean', axis = 1)
#test = test.drop('mean', axis = 1)
#train = train.drop('median', axis = 1)
#test = test.drop('median', axis = 1)

In [32]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240000 entries, 0 to 239999
Data columns (total 78 columns):
habitaciones                  240000 non-null int64
garages                       240000 non-null int64
banos                         240000 non-null int64
gimnasio                      240000 non-null int64
usosmultiples                 240000 non-null int64
piscina                       240000 non-null int64
escuelascercanas              240000 non-null int64
centroscomercialescercanos    240000 non-null int64
2012                          240000 non-null int64
2013                          240000 non-null int64
2014                          240000 non-null int64
2015                          240000 non-null int64
2016                          240000 non-null int64
precio                        240000 non-null float64
ciudad                        240000 non-null int64
provincia                     240000 non-null int64
tipodepropiedad               240000 non-null int64
ant

In [33]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Data columns (total 77 columns):
habitaciones                  60000 non-null int64
garages                       60000 non-null int64
banos                         60000 non-null int64
gimnasio                      60000 non-null int64
usosmultiples                 60000 non-null int64
piscina                       60000 non-null int64
escuelascercanas              60000 non-null int64
centroscomercialescercanos    60000 non-null int64
2012                          60000 non-null int64
2013                          60000 non-null int64
2014                          60000 non-null int64
2015                          60000 non-null int64
2016                          60000 non-null int64
ciudad                        60000 non-null int64
provincia                     60000 non-null int64
tipodepropiedad               60000 non-null int64
antiguedad_agrupada           60000 non-null int64
metroscubiertos_agrupada

In [37]:
# Resto de los labels.
Y = np.array(train['precio'])
X = train.drop('precio', axis = 1)

### =====================================================================
### XGBoost.
### =====================================================================

In [41]:
import hyperopt as hp
from hyperopt import Trials,fmin,STATUS_OK
import xgboost as xgb

### Step 1 : defining the objective function
def objective(params, n_folds=3):

# Converting pandas dataframe into xgboost format
    d_train = xgb.DMatrix(X,Y)    

# Running cross validation on your xgboost model
    cv_results = xgb.cv(params, d_train, nfold = n_folds, num_boost_round = 500, 
                        early_stopping_rounds = 25, metrics = 'rmse', seed = 0)
    
# returns the loss on validation set
    loss = min(cv_results['test-rmse-mean'])
    return loss

### step 2 : defining the search space
xgb_space = {
    #    max_depth : maximum depth allowed for every tree
    # hp.choice.choice will select 1 value from the given list
        'max_depth':hp.hp.choice('max_depth', np.arange(6, 14,1, dtype=int)),
    #    subsample : maximum allowed rows for every tree
        'subsample':hp.hp.quniform('subsample',0.5,1.0,0.05),
    #    colsample_bytree : maximum allowed features for every tree
    # hp.hp.quniform returns a float between a given range
        'colsample_bytree':hp.hp.quniform('colsample_bytree',0.5,1.0,0.05),
    #    min_child-weight : minimum number of instances required in each node
        'min_child_weight':hp.hp.quniform('min_child_weight', 100, 1000,100),
    #    reg_alpha : L1 regularisation term on weights
        'reg_alpha': hp.hp.uniform('reg_alpha', 0.0, 1.0),
    #    reg_lambda : L2 regularisation term on weights
        'reg_lambda': hp.hp.uniform('reg_lambda', 0.0, 1.0)
}
    
### step 3 : storing the results of every iteration    
bayes_trials = Trials()
MAX_EVALS = 20

# Optimize
best = fmin(fn = objective, space = xgb_space, algo = hp.tpe.suggest, 
max_evals = MAX_EVALS, trials = bayes_trials)

### best will the return the the best hyperparameter set

[18:48:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 280 extra nodes, 0 pruned nodes, max_depth=11
[18:48:50] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 286 extra nodes, 0 pruned nodes, max_depth=11
[18:48:51] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 284 extra nodes, 0 pruned nodes, max_depth=11
[18:48:52] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 270 extra nodes, 0 pruned nodes, max_depth=11
[18:48:53] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 276 extra nodes, 0 pruned nodes, max_depth=11
[18:48:53] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 274 extra nodes, 0 pruned nodes, max_depth=11
[18:48:54] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 266 extra nodes, 0 pruned nodes, max_depth=11
[18:48:55] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 294 extra nodes, 0 pruned nodes, max_depth=11


[18:49:44] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 158 extra nodes, 0 pruned nodes, max_depth=11
[18:49:45] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 92 extra nodes, 0 pruned nodes, max_depth=11
[18:49:45] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 68 extra nodes, 0 pruned nodes, max_depth=11
[18:49:46] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 108 extra nodes, 0 pruned nodes, max_depth=11
[18:49:47] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 154 extra nodes, 0 pruned nodes, max_depth=11
[18:49:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 0 pruned nodes, max_depth=11
[18:49:48] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 174 extra nodes, 0 pruned nodes, max_depth=11
[18:49:49] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 0 pruned nodes, max_depth=11
[18:

[18:50:34] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 130 extra nodes, 0 pruned nodes, max_depth=11
[18:50:35] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 0 pruned nodes, max_depth=11
[18:50:35] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=11
[18:50:36] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=11
[18:50:37] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 78 extra nodes, 0 pruned nodes, max_depth=11
[18:50:38] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 116 extra nodes, 0 pruned nodes, max_depth=11
[18:50:39] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 168 extra nodes, 0 pruned nodes, max_depth=11
[18:50:39] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 68 extra nodes, 0 pruned nodes, max_depth=11
[18:5

[18:51:23] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 74 extra nodes, 0 pruned nodes, max_depth=11
[18:51:24] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 182 extra nodes, 0 pruned nodes, max_depth=11
[18:51:25] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 196 extra nodes, 0 pruned nodes, max_depth=11
[18:51:25] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 110 extra nodes, 0 pruned nodes, max_depth=11
[18:51:26] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=11
[18:51:27] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 72 extra nodes, 0 pruned nodes, max_depth=11
[18:51:28] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 114 extra nodes, 0 pruned nodes, max_depth=11
[18:51:28] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 0 pruned nodes, max_depth=11
[18:

[18:52:14] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=11
[18:52:15] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 0 pruned nodes, max_depth=11
[18:52:15] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 0 pruned nodes, max_depth=11
[18:52:16] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=11
[18:52:17] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 0 pruned nodes, max_depth=11
[18:52:18] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 0 pruned nodes, max_depth=11
[18:52:18] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 0 pruned nodes, max_depth=11
[18:52:19] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 0 pruned nodes, max_depth=11
[18:52:2

[18:53:04] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=11
[18:53:05] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 70 extra nodes, 0 pruned nodes, max_depth=11
[18:53:06] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 0 pruned nodes, max_depth=11
[18:53:07] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 0 pruned nodes, max_depth=11
[18:53:07] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 196 extra nodes, 0 pruned nodes, max_depth=11
[18:53:08] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=11
[18:53:09] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 0 pruned nodes, max_depth=11
[18:53:10] /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=11
[18:53:

KeyboardInterrupt: 

  0%|          | 0/20 [04:31<?, ?it/s, best loss: ?]


KeyboardInterrupt: 

In [15]:
trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2, random_state=42)

In [12]:
data_dmatrix_inst = xgb.DMatrix(data=trainX,label=trainY)

In [None]:
# {'colsample_bytree': 0.6103032679062302, 'gamma': 9, 'learning_rate': 0.05017181127931773, 'max_depth': 6, 'min_child_weight': 5, 'reg_lambda': 2, 'subsample_bytree': 0.6177768456991214}

In [None]:
#{'colsample_bytree': 0.7585033814547916, 'gamma': 3, 'learning_rate': 0.050532604074423296, 'max_depth': 7, 'min_child_weight': 5, 'reg_lambda': 0, 'subsample_bytree': 0.9779760690574663}

In [26]:
xg_reg = xgb.XGBRegressor(objective = 'reg:linear', 
                          n_estimators = 500,
                          min_child_weight = 5,
                          learning_rate = 0.05017181127931773,
                          gamma = 9,
                          reg_lambda = 2,
                          max_depth = 6,
                          colsample_bytree = 0.7585033814547916, 
                          subsample_bytree = 0.9779760690574663)

In [14]:
xg_reg.fit(trainX,trainY)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.5, gamma=0.2, learning_rate=0.05,
       max_delta_step=0, max_depth=5, min_child_weight=5, missing=None,
       n_estimators=500, n_jobs=1, nthread=None, objective='reg:linear',
       random_state=0, reg_alpha=0, reg_lambda=2, scale_pos_weight=1,
       seed=None, silent=True, subsample=1, subsample_bytree=0.8)

In [15]:
y_pred = xg_reg.predict(testX)
# Calculamos el error absoluto.
errors = abs(y_pred - testY)
# Imprimimos el error.
print('Error:', round(np.mean(errors), 2), 'grados.')

('Error:', 496425.92, 'grados.')


In [16]:
# Calculamos el porcentaje de error.
mape = 100 * (errors / testY)
# Calculate la precisión.
accuracy = 100 - np.mean(mape)
print('Precision:', round(accuracy, 2), '%.')

('Precision:', 76.19, '%.')


In [17]:
prediccion = xg_reg.predict(test)

In [18]:
## =================================================================================================
## ARMAMOS EN BASE A LA PREDICCIÓN QUE TENEMOS UN CSV PARA SUBIR A KAGGLE CON EL FORMATO INDICADO!
## =================================================================================================
submission = pd.DataFrame({ 'id': Identificador['id'], 'target': prediccion })
submission.to_csv("SUBMITS/011_G34_XGBoost.csv", index=False)