In [58]:
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn import datasets
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from bayes_opt import BayesianOptimization
import category_encoders as ce
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
train = pd.read_csv(r'Orga/train.csv', index_col=0)
test = pd.read_csv(r'Orga/test.csv', index_col=0)

In [3]:
#metrica
def MAE(actual, pred):
    return (np.mean(np.absolute(actual - pred)))

## Preprocesamiento

In [4]:
train.isnull().sum()

titulo                          5387
descripcion                     1619
tipodepropiedad                   46
direccion                      53072
ciudad                           372
provincia                        155
antiguedad                     43555
habitaciones                   22471
garages                        37765
banos                          26221
metroscubiertos                17400
metrostotales                  51467
idzona                         28621
lat                           123488
lng                           123488
fecha                              0
gimnasio                           0
usosmultiples                      0
piscina                            0
escuelascercanas                   0
centroscomercialescercanos         0
precio                             0
dtype: int64

In [5]:
test.isnull().sum()

titulo                         1378
descripcion                     401
tipodepropiedad                   7
direccion                     13191
ciudad                           83
provincia                        42
antiguedad                    10714
habitaciones                   5628
garages                        9323
banos                          6554
metroscubiertos                4299
metrostotales                 12655
idzona                         7179
lat                           30695
lng                           30695
fecha                             0
gimnasio                          0
usosmultiples                     0
piscina                           0
escuelascercanas                  0
centroscomercialescercanos        0
dtype: int64

In [6]:
train['provincia'] = train['provincia'].fillna('Distrito Federal') #valor mas frecuente
test['provincia'] = test['provincia'].fillna('Distrito Federal')
train['tipodepropiedad'] = train['tipodepropiedad'].fillna('Casa') #valor mas frecuente
test['tipodepropiedad'] = test['tipodepropiedad'].fillna('Casa')

train['descripcion'] = train['descripcion'].fillna('-')
test['descripcion'] = test['descripcion'].fillna('-')

train = train.fillna(0)
test = test.fillna(0)

In [7]:
print(train.shape)
print(test.shape)

(240000, 22)
(60000, 21)


## Feature Engineering

### One Hot Encoding

In [8]:
one_hot_enc = ce.OneHotEncoder()
one_hot_encoded = one_hot_enc.fit_transform(train['provincia'])
train = train.join(one_hot_encoded.add_suffix('_oh'))
del train['provincia']

one_hot_encoded = one_hot_enc.transform(test['provincia'])
test = test.join(one_hot_encoded.add_suffix('_oh'))
del test['provincia']

In [9]:
one_hot_enc = ce.OneHotEncoder()
one_hot_encoded = one_hot_enc.fit_transform(train['tipodepropiedad'])
train = train.join(one_hot_encoded.add_suffix('_oh'))
del train['tipodepropiedad']

one_hot_encoded = one_hot_enc.transform(test['tipodepropiedad'])
test = test.join(one_hot_encoded.add_suffix('_oh'))
del test['tipodepropiedad']

### Target Encoding

In [10]:
print(train.shape)
print(test.shape)

(240000, 76)
(60000, 75)


In [11]:
X = train.drop('precio', axis = 1)
y = train['precio']
target_enc = ce.TargetEncoder(cols=['idzona', 'ciudad'])
train = target_enc.fit_transform(X, y)
train['precio'] = y

In [12]:
test = target_enc.transform(test)

In [13]:
print(train.shape)
print(test.shape)

(240000, 76)
(60000, 75)


### Agregando Features

#### Cantidad de amenities

In [14]:
train['cant_amenities'] = train['usosmultiples'] + train['piscina'] + train['gimnasio']
test['cant_amenities'] = test['usosmultiples'] + test['piscina'] + test['gimnasio']

#### Año de publicación

In [15]:
train['fecha'] = pd.to_datetime(train['fecha'])
train['año'] = train['fecha'].dt.year

test['fecha'] = pd.to_datetime(test['fecha'])
test['año'] = test['fecha'].dt.year

#### De la descripción

In [16]:
buena_ubicacion = ['buena ubicacion', 'buena ubicación', 'excelente ubicación', 'excelente ubicacion', 'bien ubicada', 'bien ubicado']
train['buena_ubicacion'] = train['descripcion'].apply(lambda x: 1 if any(word in x for word in buena_ubicacion ) else 0)
test['buena_ubicacion'] = test['descripcion'].apply(lambda x: 1 if any(word in x for word in buena_ubicacion ) else 0)

In [17]:
luminoso = ['luminoso', 'luminosa']
train['luminoso'] = train['descripcion'].apply(lambda x: 1 if any(word in x for word in luminoso ) else 0)
test['luminoso'] = test['descripcion'].apply(lambda x: 1 if any(word in x for word in luminoso ) else 0)

In [18]:
jardin = ['jardin', 'jardín', 'parque', 'patio']
train['jardin'] = train['descripcion'].apply(lambda x: 1 if any(word in x for word in luminoso ) else 0)
test['jardin'] = test['descripcion'].apply(lambda x: 1 if any(word in x for word in luminoso ) else 0)

In [19]:
balcon = ['balcon', 'balcón', 'terraza']
train['balcon'] = train['descripcion'].apply(lambda x: 1 if any(word in x for word in luminoso ) else 0)
test['balcon'] = test['descripcion'].apply(lambda x: 1 if any(word in x for word in luminoso ) else 0)

#### Cercanías

In [21]:
train['centroscomercialescercanos'] = train['centroscomercialescercanos'].map({1:2, 0:0})
test['centroscomercialescercanos'] = test['centroscomercialescercanos'].map({1:2, 0:0})

In [22]:
train['cercanias'] = train['centroscomercialescercanos'] + train['escuelascercanas']
train['cercanias'] = train['cercanias'].astype(str)

test['cercanias'] = test['centroscomercialescercanos'] + test['escuelascercanas']
test['cercanias'] = test['cercanias'].astype(str)

In [23]:
train = train.join(pd.get_dummies(train['cercanias'], prefix = 'cercania'))
del train['cercanias']

test = test.join(pd.get_dummies(test['cercanias'], prefix = 'cercania'))
del test['cercanias']

### Borrando columnas innecesarias

In [24]:
del train['lat']
del train['lng']
del train['descripcion']
del train['titulo']
del train['direccion']
del train['fecha']

del test['lat']
del test['lng']
del test['descripcion']
del test['titulo']
del test['direccion']
del test['fecha']

In [25]:
print(train.shape)
print(test.shape)

(240000, 80)
(60000, 79)


### Dividiendo por año

In [26]:
train_2012 = train[train['año'] == 2012]
train_2013 = train[train['año'] == 2013]
train_2014 = train[train['año'] == 2014]
train_2015 = train[train['año'] == 2015]
train_2016 = train[train['año'] == 2016]

test_2012 = test[test['año'] == 2012]
test_2013 = test[test['año'] == 2013]
test_2014 = test[test['año'] == 2014]
test_2015 = test[test['año'] == 2015]
test_2016 = test[test['año'] == 2016]

## KNN

In [27]:
def KNN(train, param_grid):
    KNN = KNeighborsRegressor()
    X = train.drop(['precio'], axis=1)
    y = train['precio']
    grid = GridSearchCV(KNN, param_grid, cv=5, scoring='neg_mean_absolute_error')
    grid.fit(X, y)
    KNN = grid.best_estimator_
    print(grid.best_params_)
    print(grid.best_score_)
    return KNN

In [None]:
k_valores = list(range(1, 21))
param_grid = dict(n_neighbors=k_valores)
KNN_2012 = KNN(train_2012, param_grid)
KNN_2013 = KNN(train_2013, param_grid)
KNN_2014 = KNN(train_2014, param_grid)
KNN_2015 = KNN(train_2015, param_grid)
KNN_2016 = KNN(train_2016, param_grid)

## XGBoost

### Tuneo de hiperparámetros

In [28]:
def train_set(train,test):
    X = train.drop(['precio'], axis=1)
    y = train['precio']
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=1)
    print("Train shapes: X = " + str(X_train.shape) + " y = " + str(y_train.shape))
    print("Validation shapes: X = " + str(X_val.shape) +  " y = " + str(y_val.shape))
    print("Test shape: " + str(test.shape))
    return X, y, X_val, y_val

In [29]:
x_2012, y_2012, x_2012_val, y_2012_val = train_set(train_2012,test_2012)
x_2013, y_2013, x_2013_val, y_2013_val = train_set(train_2013,test_2013)
x_2014, y_2014, x_2014_val, y_2014_val = train_set(train_2014,test_2014)
x_2015, y_2015, x_2015_val, y_2015_val = train_set(train_2015,test_2015)
x_2016, y_2016, x_2016_val, y_2016_val = train_set(train_2016,test_2016)

Train shapes: X = (14120, 79) y = (14120,)
Validation shapes: X = (9414, 79) y = (9414,)
Test shape: (5899, 79)
Train shapes: X = (18231, 79) y = (18231,)
Validation shapes: X = (12155, 79) y = (12155,)
Test shape: (7592, 79)
Train shapes: X = (24343, 79) y = (24343,)
Validation shapes: X = (16229, 79) y = (16229,)
Test shape: (10018, 79)
Train shapes: X = (30882, 79) y = (30882,)
Validation shapes: X = (20588, 79) y = (20588,)
Test shape: (13017, 79)
Train shapes: X = (56422, 79) y = (56422,)
Validation shapes: X = (37616, 79) y = (37616,)
Test shape: (23474, 79)


In [30]:
def xgb_evaluate(max_depth, gamma, colsample_bytree,seed,min_child_weight,n_estimators,
                  reg_alpha,reg_lambda,subsample,learning_rate):
    params = {'eval_metric': 'rmse',
              'max_depth': int(max_depth),
              'subsample': subsample,
              'eta': 0.1,
              'gamma': gamma,
              'colsample_bytree': colsample_bytree,
              "seed": int(seed),
              "min_child_weight": min_child_weight,
              "n_estimators": n_estimators,
              "reg_alpha": reg_alpha,
              "reg_lambda": reg_lambda,
              "learning_rate": learning_rate
             }
    # Used around 1000 boosting rounds in the full model
    cv_result = xgb.cv(params, dtrain, num_boost_round=100, nfold=3)    
    
    # Bayesian optimization only knows how to maximize, not minimize, so return the negative RMSE
    return -1.0 * cv_result['test-rmse-mean'].iloc[-1]

In [31]:
def optimize(sett):
    dtrain = sett
    xgb_bo = BayesianOptimization(xgb_evaluate, {'max_depth': (3, 7), 
                                             'gamma': (0, 1),
                                             'colsample_bytree': (0.3, 0.9),
                                             "seed": (10,50),
                                             "min_child_weight": (0.4,1.5),
                                             "n_estimators":(100,10000),                                                                    
                                             "reg_alpha":(0.2,0.75),
                                             "reg_lambda": (0.2,0.8),
                                             "subsample" : (0.3, 0.8),
                                             "learning_rate": (0.01,0.07),
                                            })
    # Se probo con 3 puntos iniciales y 10 iteraciones, deberiamos probar con mas en ambas
    # La prox iteracion en casa probar 100 y 10000
    xgb_bo.maximize(init_points=3, n_iter=10, acq='ei')
    params = xgb_bo.max['params']
    params['max_depth'] = int(params['max_depth'])
    params["seed"] = int(params["seed"])
    params["n_estimators"] = int(params["n_estimators"])
    return params

In [32]:
import xgboost as xgb

dtrain_2012 = xgb.DMatrix(x_2012, label=y_2012)
dtrain_2013 = xgb.DMatrix(x_2013, label=y_2013)
dtrain_2014 = xgb.DMatrix(x_2014, label=y_2014)
dtrain_2015 = xgb.DMatrix(x_2015, label=y_2015)
dtrain_2016 = xgb.DMatrix(x_2016, label=y_2016)

  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


In [33]:
dtrain = dtrain_2012
params_2012 = optimize(dtrain_2012)
dtrain = dtrain_2013
params_2013 = optimize(dtrain_2013)
dtrain = dtrain_2014
params_2014 = optimize(dtrain_2014)
dtrain = dtrain_2015
params_2015 = optimize(dtrain_2015)
dtrain = dtrain_2016
params_2016 = optimize(dtrain_2016)

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | n_esti... | reg_alpha | reg_la... |   seed    | subsample |
-------------------------------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-8.509e+0[0m | [0m 0.7917  [0m | [0m 0.4141  [0m | [0m 0.05016 [0m | [0m 6.436   [0m | [0m 0.878   [0m | [0m 2.321e+0[0m | [0m 0.275   [0m | [0m 0.6328  [0m | [0m 20.29   [0m | [0m 0.4794  [0m |
| [0m 2       [0m | [0m-1.374e+0[0m | [0m 0.3899  [0m | [0m 0.7415  [0m | [0m 0.01067 [0m | [0m 6.607   [0m | [0m 0.9903  [0m | [0m 5.272e+0[0m | [0m 0.4276  [0m | [0m 0.6156  [0m | [0m 15.54   [0m | [0m 0.7937  [0m |
| [0m 3       [0m | [0m-9.08e+05[0m | [0m 0.4872  [0m | [0m 0.3818  [0m | [0m 0.03285 [0m | [0m 5.813   [0m | [0m 0.5812  [0m | [0m 3.867e+0[0m | [0m 0.2576  [0m | [0m 0.4213  [0m | [0m 26.97   [0m | [

| [0m 4       [0m | [0m-1.025e+0[0m | [0m 0.3491  [0m | [0m 0.5575  [0m | [0m 0.06492 [0m | [0m 3.559   [0m | [0m 1.47    [0m | [0m 408.6   [0m | [0m 0.4765  [0m | [0m 0.6454  [0m | [0m 19.13   [0m | [0m 0.3657  [0m |
| [0m 5       [0m | [0m-1.04e+06[0m | [0m 0.7005  [0m | [0m 0.8194  [0m | [0m 0.04574 [0m | [0m 3.79    [0m | [0m 0.7593  [0m | [0m 4.631e+0[0m | [0m 0.5427  [0m | [0m 0.6937  [0m | [0m 25.6    [0m | [0m 0.5833  [0m |
| [0m 6       [0m | [0m-1.021e+0[0m | [0m 0.8712  [0m | [0m 0.8589  [0m | [0m 0.03359 [0m | [0m 4.547   [0m | [0m 0.7072  [0m | [0m 6.572e+0[0m | [0m 0.5827  [0m | [0m 0.5438  [0m | [0m 37.57   [0m | [0m 0.3588  [0m |
| [0m 7       [0m | [0m-1.068e+0[0m | [0m 0.3157  [0m | [0m 0.7945  [0m | [0m 0.04778 [0m | [0m 3.386   [0m | [0m 0.5781  [0m | [0m 7.624e+0[0m | [0m 0.4924  [0m | [0m 0.2497  [0m | [0m 23.55   [0m | [0m 0.751   [0m |
| [0m 8       [0m | [0m-9.857

| [0m 9       [0m | [0m-1.091e+0[0m | [0m 0.4784  [0m | [0m 0.346   [0m | [0m 0.04949 [0m | [0m 5.967   [0m | [0m 1.015   [0m | [0m 1.952e+0[0m | [0m 0.5595  [0m | [0m 0.4017  [0m | [0m 17.46   [0m | [0m 0.5201  [0m |
| [0m 10      [0m | [0m-1.593e+0[0m | [0m 0.7461  [0m | [0m 0.1456  [0m | [0m 0.01256 [0m | [0m 5.561   [0m | [0m 1.402   [0m | [0m 1.242e+0[0m | [0m 0.2261  [0m | [0m 0.4398  [0m | [0m 47.67   [0m | [0m 0.458   [0m |
| [0m 11      [0m | [0m-1.175e+0[0m | [0m 0.364   [0m | [0m 0.994   [0m | [0m 0.03297 [0m | [0m 5.612   [0m | [0m 0.4802  [0m | [0m 4.134e+0[0m | [0m 0.6321  [0m | [0m 0.6458  [0m | [0m 22.47   [0m | [0m 0.5619  [0m |
| [0m 12      [0m | [0m-1.51e+06[0m | [0m 0.4144  [0m | [0m 0.3442  [0m | [0m 0.01617 [0m | [0m 4.031   [0m | [0m 0.8503  [0m | [0m 4.977e+0[0m | [0m 0.5082  [0m | [0m 0.7982  [0m | [0m 46.79   [0m | [0m 0.7367  [0m |
| [0m 13      [0m | [0m-1.166

### Probando

In [49]:
def fabrica_XGB(params):
    best_xgb_model = XGBRegressor(colsample_bytree=params["colsample_bytree"],
                 gamma=params["gamma"],                 
                 learning_rate=params["learning_rate"],
                 max_depth=params["max_depth"],
                 min_child_weight=params["min_child_weight"],
                 n_estimators=params["n_estimators"],                                                                    
                 reg_alpha=params["reg_alpha"],
                 reg_lambda=params["reg_lambda"],
                 subsample=params["subsample"],
                 seed=params["seed"])
    
    return best_xgb_model

In [50]:
XGB_2012 = fabrica_XGB(params_2012)
XGB_2013 = fabrica_XGB(params_2013)
XGB_2014 = fabrica_XGB(params_2014)
XGB_2015 = fabrica_XGB(params_2015)
XGB_2016 = fabrica_XGB(params_2016)

In [None]:
XGB_2012.fit(x_2012,y_2012)
XGB_pred_val = XGB_2012.predict(x_2012_val)
XGB_mae = MAE(y_2012_val, XGB_pred_val)
print("MAE XGB: " + str(XGB_mae))

In [None]:
XGB_2013.fit(x_2013,y_2013)
XGB_pred_val = XGB_2013.predict(x_2013_val)
XGB_mae = MAE(y_2013_val, XGB_pred_val)
print("MAE XGB: " + str(XGB_mae))

In [None]:
XGB_2014.fit(x_2014,y_2014)
XGB_pred_val = XGB_2014.predict(x_2014_val)
XGB_mae = MAE(y_2014_val, XGB_pred_val)
print("MAE XGB: " + str(XGB_mae))

In [None]:
XGB_2015.fit(x_2015,y_2015)
XGB_pred_val = XGB_2015.predict(x_2015_val)
XGB_mae = MAE(y_2015_val, XGB_pred_val)
print("MAE XGB: " + str(XGB_mae))

In [None]:
XGB_2016.fit(x_2016,y_2016)
XGB_pred_val = XGB_2016.predict(x_2016_val)
XGB_mae = MAE(y_2016_val, XGB_pred_val)
print("MAE XGB: " + str(XGB_mae))

## Random Forest

In [96]:
def fabrica_RF(train, param_grid):
    RF = RandomForestRegressor()
    X = train.drop(['precio'], axis=1)
    y = train['precio']
    grid = GridSearchCV(RF, param_grid, cv=4, scoring='neg_mean_absolute_error')
    grid.fit(X, y)
    RF = grid.best_estimator_
    print(grid.best_params_)
    print(grid.best_score_)
    return RF

In [94]:
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

In [None]:
rf_2012 = fabrica_RF(train_2012, param_grid)
rf_2013 = fabrica_RF(train_2013, param_grid)
rf_2014 = fabrica_RF(train_2014, param_grid)
rf_2015 = fabrica_RF(train_2015, param_grid)
rf_2016 = fabrica_RF(train_2016, param_grid)

In [100]:
rf_2012 = RandomForestRegressor(n_estimators=200)
rf_2012.fit(x_2012,y_2012)
rf_pred_val = rf_2012.predict(x_2012_val)
rf_mae = MAE(y_2012_val, rf_pred_val)
print("MAE rf: " + str(rf_mae))

MAE rf: 169045.0816290103


In [101]:
rf_2013 = RandomForestRegressor(n_estimators=200)
rf_2013.fit(x_2013,y_2013)
rf_pred_val = rf.predict(x_2013_val)
rf_mae = MAE(y_2013_val, rf_pred_val)
print("MAE rf: " + str(rf_mae))

MAE rf: 189368.06082819257


In [102]:
rf_2014 = RandomForestRegressor(n_estimators=200)
rf_2014.fit(x_2014,y_2014)
rf_pred_val = rf.predict(x_2014_val)
rf_mae = MAE(y_2014_val, rf_pred_val)
print("MAE rf: " + str(rf_mae))

MAE rf: 578838.4877421026


In [103]:
rf_2015 = RandomForestRegressor(n_estimators=200)
rf_2015.fit(x_2015,y_2015)
rf_pred_val = rf_2015.predict(x_2015_val)
rf_mae = MAE(y_2015_val, rf_pred_val)
print("MAE rf: " + str(rf_mae))

MAE rf: 196881.86228367864


In [104]:
rf_2016 = RandomForestRegressor(n_estimators=200)
rf_2016.fit(x_2016,y_2016)
rf_pred_val = rf_2016.predict(x_2016_val)
rf_mae = MAE(y_2016_val, rf_pred_val)
print("MAE rf: " + str(rf_mae))

MAE rf: 211762.03941005524


In [105]:
res_2012 = rf_2012.predict(test_2012)
res_2013 = rf_2013.predict(test_2013)
res_2014 = rf_2014.predict(test_2014)
res_2015 = rf_2015.predict(test_2015)
res_2016 = rf_2016.predict(test_2016)

## Ensembles

In [51]:
ensemble2012 = VotingRegressor([('rf', rf_2012), ('xgb', XGB_2012)])
ensemble2013 = VotingRegressor([('rf', rf_2013), ('xgb', XGB_2013)])
ensemble2014 = VotingRegressor([('rf', rf_2014), ('xgb', XGB_2014)])
ensemble2015 = VotingRegressor([('rf', rf_2015), ('xgb', XGB_2015)])
ensemble2016 = VotingRegressor([('rf', rf_2016), ('xgb', XGB_2016)])

In [52]:
ensemble2012.fit(x_2012, y_2012)
en_pred_val = ensemble2012.predict(x_2012_val)
en_mae = MAE(y_2012_val, en_pred_val)
print("MAE Ensemble: " + str(en_mae))

  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


MAE Ensemble: 188340.2204186133


In [53]:
ensemble2013.fit(x_2013, y_2013)
en_pred_val = ensemble2013.predict(x_2013_val)
en_mae = MAE(y_2013_val, en_pred_val)
print("MAE Ensemble: " + str(en_mae))

  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


MAE Ensemble: 200539.49388936692


In [None]:
ensemble2014.fit(x_2014, y_2014)
en_pred_val = ensemble2014.predict(x_2014_val)
en_mae = MAE(y_2014_val, en_pred_val)
print("MAE Ensemble: " + str(en_mae))

In [37]:
ensemble2015.fit(x_2015, y_2015)
en_pred_val = ensemble2015.predict(x_2015_val)
en_mae = MAE(y_2015_val, en_pred_val)
print("MAE Ensemble: " + str(en_mae))

  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


MAE Ensemble: 558606.4336739318


In [38]:
ensemble2016.fit(x_2016, y_2016)
en_pred_val = ensemble2016.predict(x_2016_val)
en_mae = MAE(y_2016_val, en_pred_val)
print("MAE Ensemble: " + str(en_mae))

  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


MAE Ensemble: 701516.6280770054


In [41]:
res_2012 = ensemble2012.predict(test_2012)
res_2013 = ensemble2013.predict(test_2013)
res_2014 = ensemble2014.predict(test_2014)
res_2015 = ensemble2015.predict(test_2015)
res_2016 = ensemble2016.predict(test_2016)

In [106]:
res_2012 = pd.DataFrame(res_2012, index=test_2012.index, columns=['precio'])
res_2012 = res_2012.rename(columns={'precio':'target'})

res_2013 = pd.DataFrame(res_2013, index=test_2013.index, columns=['precio'])
res_2013 = res_2013.rename(columns={'precio':'target'})

res_2014 = pd.DataFrame(res_2014, index=test_2014.index, columns=['precio'])
res_2014 = res_2014.rename(columns={'precio':'target'})

res_2015 = pd.DataFrame(res_2015, index=test_2015.index, columns=['precio'])
res_2015 = res_2015.rename(columns={'precio':'target'})

res_2016 = pd.DataFrame(res_2016, index=test_2016.index, columns=['precio'])
res_2016 = res_2016.rename(columns={'precio':'target'})

In [107]:
res = res_2012.append(res_2013).append(res_2014).append(res_2015).append(res_2016)
res.to_csv("workshop-submission-rf.csv", header=True)