In [73]:
import nbimporter

import pandas as pd
import numpy as np
from datetime import datetime

# sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import category_encoders as ce

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

# notebooks
from preprocessing import load_train


In [2]:
train = load_train()

# Modelos

In [3]:
def regression(df, regressor):
    X = np.array(df.drop('precio', axis=1))
    y = np.array(df['precio'])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    
    # train:
    regressor.fit(X_train, y_train)
    
    # prediction:
    y_pred = regressor.predict(X_test)
    print('MAE: ', int(metrics.mean_absolute_error(y_test, y_pred)))

# Funciones auxiliares:

In [4]:
def aniomes(anio, mes):
    if len(str(mes)) == 1:
        return int(str(anio)+'0'+str(mes))
    return int(str(anio)+str(mes))

# Regresion lineal

In [5]:
df = train.copy()

In [6]:
df.columns

Index(['titulo', 'descripcion', 'tipodepropiedad', 'ciudad', 'provincia',
       'antiguedad', 'habitaciones', 'garages', 'banos', 'metroscubiertos',
       'metrostotales', 'fecha', 'gimnasio', 'usosmultiples', 'piscina',
       'escuelascercanas', 'centroscomercialescercanos', 'precio'],
      dtype='object')

In [7]:
df.corr()['precio'].nlargest(10)[1:]

metroscubiertos    0.629187
banos              0.534060
metrostotales      0.514411
garages            0.348543
habitaciones       0.251014
gimnasio           0.153898
usosmultiples      0.145407
piscina            0.112712
antiguedad         0.073097
Name: precio, dtype: float64

In [8]:
def tiene(descripcion, indicadores):
    for indicador in indicadores:
        if indicador in str(descripcion).lower():
            return 1
        else: return 0

In [9]:
# Vamos a agregarle features: 

# tiene jardin?
df['jardin'] = df['descripcion'].apply(lambda x: tiene(x, ['jardin', 'patio']))

# tiene balcon?
df['balcon'] = df['descripcion'].apply(lambda x: tiene(x, ['balcon']))

# esta a estrenar?
df['estrenar'] = df['antiguedad'].apply(lambda x: 1 if x==0 else 0)


In [10]:
# Empezamos haciendo una regresion lineal con los features mas basicos.
df = df[['tipodepropiedad', 'provincia', 'habitaciones', 'garages',
         'banos', 'metroscubiertos', 'fecha', 'gimnasio', 'jardin', 'balcon',
         'usosmultiples', 'piscina','escuelascercanas', 'centroscomercialescercanos', 'precio']]

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240000 entries, 0 to 239999
Data columns (total 15 columns):
tipodepropiedad               240000 non-null category
provincia                     240000 non-null category
habitaciones                  217529 non-null float64
garages                       202235 non-null float64
banos                         213779 non-null float64
metroscubiertos               222600 non-null float64
fecha                         240000 non-null datetime64[ns]
gimnasio                      240000 non-null int8
jardin                        240000 non-null int64
balcon                        240000 non-null int64
usosmultiples                 240000 non-null int8
piscina                       240000 non-null int8
escuelascercanas              240000 non-null int8
centroscomercialescercanos    240000 non-null int8
precio                        240000 non-null float64
dtypes: category(2), datetime64[ns](1), float64(5), int64(2), int8(5)
memory usage: 16.3 

In [12]:
df.head(3)

Unnamed: 0,tipodepropiedad,provincia,habitaciones,garages,banos,metroscubiertos,fecha,gimnasio,jardin,balcon,usosmultiples,piscina,escuelascercanas,centroscomercialescercanos,precio
0,Apartamento,Distrito Federal,2.0,1.0,2.0,80.0,2015-08-23,0,0,0,0,0,0,0,2273000.0
1,Casa en condominio,Distrito Federal,3.0,2.0,2.0,268.0,2013-06-28,0,0,0,0,0,1,1,3600000.0
2,Casa,Jalisco,3.0,2.0,2.0,144.0,2015-10-17,0,1,0,0,0,0,0,1200000.0


In [13]:
# Manejaremos los nulos.

In [14]:
# Para las habitaciones, en los casos de nulos, elegimos reemplazar el nulo con el 
# valor promedio de habitaciones para ese tipo de propiedad.

hab_mean = df.groupby(['tipodepropiedad', 'provincia'])['habitaciones'].transform(lambda x: x.mean())
hab_mean2 = df.groupby(['tipodepropiedad'])['habitaciones'].transform(lambda x: x.mean())
df['habitaciones'] = df['habitaciones'].fillna(hab_mean).fillna(hab_mean2).apply(lambda x: int(x))

In [15]:
# Utilizaremos el mismo criterio para garages y banos.

gar_mean = df.groupby(['tipodepropiedad', 'habitaciones', 'provincia'])['garages'].transform(lambda x: x.mean())
gar_mean2 = df.groupby(['tipodepropiedad', 'habitaciones'])['garages'].transform(lambda x: x.mean())
gar_mean3 = df.groupby(['tipodepropiedad'])['garages'].transform(lambda x: x.mean())
df['garages'] = df['garages'].fillna(gar_mean).fillna(gar_mean2).fillna(gar_mean3)

ban_mean = df.groupby(['tipodepropiedad', 'habitaciones', 'provincia'])['banos'].transform(lambda x: x.mean())
ban_mean2 = df.groupby(['tipodepropiedad', 'habitaciones'])['banos'].transform(lambda x: x.mean())
ban_mean3 = df.groupby(['tipodepropiedad'])['banos'].transform(lambda x: x.mean())
df['banos'] = df['banos'].fillna(ban_mean).fillna(ban_mean2).fillna(ban_mean3)

In [16]:
# Para los metros cubiertos, haremos algo similar:

gar_mean = df.groupby(['tipodepropiedad', 'habitaciones', 'provincia'])['metroscubiertos'].transform(lambda x: x.mean())
gar_mean2 = df.groupby(['tipodepropiedad', 'habitaciones'])['metroscubiertos'].transform(lambda x: x.mean())
gar_mean3 = df.groupby(['tipodepropiedad'])['metroscubiertos'].transform(lambda x: x.mean())
df['metroscubiertos'] = df['metroscubiertos'].fillna(gar_mean).fillna(gar_mean2).fillna(gar_mean3).apply(lambda x: int(x))

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240000 entries, 0 to 239999
Data columns (total 15 columns):
tipodepropiedad               240000 non-null category
provincia                     240000 non-null category
habitaciones                  240000 non-null int64
garages                       240000 non-null float64
banos                         240000 non-null float64
metroscubiertos               240000 non-null int64
fecha                         240000 non-null datetime64[ns]
gimnasio                      240000 non-null int8
jardin                        240000 non-null int64
balcon                        240000 non-null int64
usosmultiples                 240000 non-null int8
piscina                       240000 non-null int8
escuelascercanas              240000 non-null int8
centroscomercialescercanos    240000 non-null int8
precio                        240000 non-null float64
dtypes: category(2), datetime64[ns](1), float64(3), int64(4), int8(5)
memory usage: 16.3 MB


In [18]:
# Realizaremos one hot encoding para las variables categoricas.

In [19]:
df = pd.get_dummies(df)

In [20]:
# Manejamos la fecha

In [21]:
df['anio'] = df['fecha'].dt.year
df['mes'] = df['fecha'].dt.month
df['aniomes'] = df.apply(lambda row: aniomes(row['anio'], row['mes']), axis=1)
df['fecha'] = df['fecha'].apply(lambda x: int(datetime.timestamp(x)))

In [22]:
df.head(3)

Unnamed: 0,habitaciones,garages,banos,metroscubiertos,fecha,gimnasio,jardin,balcon,usosmultiples,piscina,...,provincia_Tabasco,provincia_Tamaulipas,provincia_Tlaxcala,provincia_Veracruz,provincia_Yucatán,provincia_Zacatecas,provincia_unknown,anio,mes,aniomes
0,2,1.0,2.0,80,1440298800,0,0,0,0,0,...,0,0,0,0,0,0,0,2015,8,201508
1,3,2.0,2.0,268,1372388400,0,0,0,0,0,...,0,0,0,0,0,0,0,2013,6,201306
2,3,2.0,2.0,144,1445050800,0,1,0,0,0,...,0,0,0,0,0,0,0,2015,10,201510


In [23]:
# Ahora ya estamos listos para aplicar distintos metodos de regresion lineal:

In [24]:
df.columns

Index(['habitaciones', 'garages', 'banos', 'metroscubiertos', 'fecha',
       'gimnasio', 'jardin', 'balcon', 'usosmultiples', 'piscina',
       'escuelascercanas', 'centroscomercialescercanos', 'precio',
       'tipodepropiedad_Apartamento', 'tipodepropiedad_Bodega comercial',
       'tipodepropiedad_Casa', 'tipodepropiedad_Casa en condominio',
       'tipodepropiedad_Edificio', 'tipodepropiedad_Local Comercial',
       'tipodepropiedad_Oficina comercial', 'tipodepropiedad_Otro',
       'tipodepropiedad_Terreno', 'tipodepropiedad_Terreno comercial',
       'provincia_Aguascalientes', 'provincia_Baja California Norte',
       'provincia_Baja California Sur', 'provincia_Campeche',
       'provincia_Chiapas', 'provincia_Chihuahua', 'provincia_Coahuila',
       'provincia_Colima', 'provincia_Distrito Federal', 'provincia_Durango',
       'provincia_Edo. de México', 'provincia_Guanajuato',
       'provincia_Guerrero', 'provincia_Hidalgo', 'provincia_Jalisco',
       'provincia_Michoacán', 

In [25]:
regression(df, LinearRegression())

MAE:  934920


In [26]:
df.columns

Index(['habitaciones', 'garages', 'banos', 'metroscubiertos', 'fecha',
       'gimnasio', 'jardin', 'balcon', 'usosmultiples', 'piscina',
       'escuelascercanas', 'centroscomercialescercanos', 'precio',
       'tipodepropiedad_Apartamento', 'tipodepropiedad_Bodega comercial',
       'tipodepropiedad_Casa', 'tipodepropiedad_Casa en condominio',
       'tipodepropiedad_Edificio', 'tipodepropiedad_Local Comercial',
       'tipodepropiedad_Oficina comercial', 'tipodepropiedad_Otro',
       'tipodepropiedad_Terreno', 'tipodepropiedad_Terreno comercial',
       'provincia_Aguascalientes', 'provincia_Baja California Norte',
       'provincia_Baja California Sur', 'provincia_Campeche',
       'provincia_Chiapas', 'provincia_Chihuahua', 'provincia_Coahuila',
       'provincia_Colima', 'provincia_Distrito Federal', 'provincia_Durango',
       'provincia_Edo. de México', 'provincia_Guanajuato',
       'provincia_Guerrero', 'provincia_Hidalgo', 'provincia_Jalisco',
       'provincia_Michoacán', 

In [27]:
df2 = df.drop(['fecha', 'aniomes'], axis=1)
regression(df2, LinearRegression())

MAE:  934912


In [28]:
df2.head()

Unnamed: 0,habitaciones,garages,banos,metroscubiertos,gimnasio,jardin,balcon,usosmultiples,piscina,escuelascercanas,...,provincia_Sonora,provincia_Tabasco,provincia_Tamaulipas,provincia_Tlaxcala,provincia_Veracruz,provincia_Yucatán,provincia_Zacatecas,provincia_unknown,anio,mes
0,2,1.0,2.0,80,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2015,8
1,3,2.0,2.0,268,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,2013,6
2,3,2.0,2.0,144,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,2015,10
3,2,1.0,1.0,63,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,2012,3
4,2,1.0,1.0,95,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2016,6


<hr>

# XGBoost

In [33]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

In [39]:
reg = xgb.XGBRegressor(max_depth=9,n_estimators=100,learning_rate=0.1, verbosity=2)

In [40]:
regression(df2, reg)

[02:32:45] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 870 extra nodes, 0 pruned nodes, max_depth=9
[02:32:45] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 898 extra nodes, 0 pruned nodes, max_depth=9
[02:32:46] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 914 extra nodes, 0 pruned nodes, max_depth=9
[02:32:47] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 938 extra nodes, 0 pruned nodes, max_depth=9
[02:32:48] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 942 extra nodes, 0 pruned nodes, max_depth=9
[02:32:49] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 942 extra nodes, 0 pruned nodes, max_depth=9
[02:32:50] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 962 extra nodes, 0 pruned nodes, max_depth=9
[02:32:51] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 948 e

[02:33:39] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 402 extra nodes, 0 pruned nodes, max_depth=9
[02:33:40] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 454 extra nodes, 0 pruned nodes, max_depth=9
[02:33:40] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 426 extra nodes, 0 pruned nodes, max_depth=9
[02:33:41] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 410 extra nodes, 0 pruned nodes, max_depth=9
[02:33:42] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 420 extra nodes, 0 pruned nodes, max_depth=9
[02:33:43] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 376 extra nodes, 0 pruned nodes, max_depth=9
[02:33:44] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 620 extra nodes, 0 pruned nodes, max_depth=9
[02:33:45] INFO: /workspace/src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 406 e

# LightGBM

In [43]:
import lightgbm as lgb

In [60]:
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': 'mae',
    'max_depth': 14, 
    'learning_rate': 0.05,
    'verbose': 0, 
    'early_stopping_round': 20}
n_estimators=5000

X = np.array(df2.drop('precio', axis=1))
y = np.array(df2['precio'])

x_train, x_valid, y_train, y_valid = train_test_split(X, y, test_size=0.10, random_state=0)
d_train = lgb.Dataset(x_train, label=y_train)
d_valid = lgb.Dataset(x_valid, label=y_valid)
watchlist = [d_valid]

model = lgb.train(params, d_train, n_estimators, watchlist, verbose_eval=1)

preds = model.predict(x_valid)
print('MAE: ', int(metrics.mean_absolute_error(y_valid, preds)))




[1]	valid_0's l1: 1.57671e+06
Training until validation scores don't improve for 20 rounds
[2]	valid_0's l1: 1.52805e+06
[3]	valid_0's l1: 1.48279e+06
[4]	valid_0's l1: 1.44048e+06
[5]	valid_0's l1: 1.40106e+06
[6]	valid_0's l1: 1.36412e+06
[7]	valid_0's l1: 1.32964e+06
[8]	valid_0's l1: 1.29771e+06
[9]	valid_0's l1: 1.26762e+06
[10]	valid_0's l1: 1.23949e+06
[11]	valid_0's l1: 1.21352e+06
[12]	valid_0's l1: 1.18935e+06
[13]	valid_0's l1: 1.16625e+06
[14]	valid_0's l1: 1.14405e+06
[15]	valid_0's l1: 1.12391e+06
[16]	valid_0's l1: 1.10482e+06
[17]	valid_0's l1: 1.08695e+06
[18]	valid_0's l1: 1.07035e+06
[19]	valid_0's l1: 1.05389e+06
[20]	valid_0's l1: 1.0387e+06
[21]	valid_0's l1: 1.02466e+06
[22]	valid_0's l1: 1.01143e+06
[23]	valid_0's l1: 999311
[24]	valid_0's l1: 988591
[25]	valid_0's l1: 977117
[26]	valid_0's l1: 967045
[27]	valid_0's l1: 956572
[28]	valid_0's l1: 947994
[29]	valid_0's l1: 939140
[30]	valid_0's l1: 931450
[31]	valid_0's l1: 923497
[32]	valid_0's l1: 915782
[33]	va

[319]	valid_0's l1: 729415
[320]	valid_0's l1: 729271
[321]	valid_0's l1: 729209
[322]	valid_0's l1: 729162
[323]	valid_0's l1: 729034
[324]	valid_0's l1: 728967
[325]	valid_0's l1: 728909
[326]	valid_0's l1: 728874
[327]	valid_0's l1: 728863
[328]	valid_0's l1: 728813
[329]	valid_0's l1: 728769
[330]	valid_0's l1: 728697
[331]	valid_0's l1: 728685
[332]	valid_0's l1: 728687
[333]	valid_0's l1: 728665
[334]	valid_0's l1: 728614
[335]	valid_0's l1: 728564
[336]	valid_0's l1: 728537
[337]	valid_0's l1: 728508
[338]	valid_0's l1: 728370
[339]	valid_0's l1: 728326
[340]	valid_0's l1: 728288
[341]	valid_0's l1: 728270
[342]	valid_0's l1: 728257
[343]	valid_0's l1: 728150
[344]	valid_0's l1: 728065
[345]	valid_0's l1: 728035
[346]	valid_0's l1: 727915
[347]	valid_0's l1: 727899
[348]	valid_0's l1: 727866
[349]	valid_0's l1: 727798
[350]	valid_0's l1: 727758
[351]	valid_0's l1: 727702
[352]	valid_0's l1: 727654
[353]	valid_0's l1: 727630
[354]	valid_0's l1: 727566
[355]	valid_0's l1: 727559
[

[626]	valid_0's l1: 719029
[627]	valid_0's l1: 719020
[628]	valid_0's l1: 718993
[629]	valid_0's l1: 718987
[630]	valid_0's l1: 718923
[631]	valid_0's l1: 718896
[632]	valid_0's l1: 718895
[633]	valid_0's l1: 718876
[634]	valid_0's l1: 718863
[635]	valid_0's l1: 718857
[636]	valid_0's l1: 718848
[637]	valid_0's l1: 718822
[638]	valid_0's l1: 718789
[639]	valid_0's l1: 718740
[640]	valid_0's l1: 718734
[641]	valid_0's l1: 718699
[642]	valid_0's l1: 718684
[643]	valid_0's l1: 718680
[644]	valid_0's l1: 718670
[645]	valid_0's l1: 718671
[646]	valid_0's l1: 718648
[647]	valid_0's l1: 718605
[648]	valid_0's l1: 718593
[649]	valid_0's l1: 718573
[650]	valid_0's l1: 718547
[651]	valid_0's l1: 718503
[652]	valid_0's l1: 718490
[653]	valid_0's l1: 718463
[654]	valid_0's l1: 718415
[655]	valid_0's l1: 718374
[656]	valid_0's l1: 718348
[657]	valid_0's l1: 718320
[658]	valid_0's l1: 718302
[659]	valid_0's l1: 718296
[660]	valid_0's l1: 718293
[661]	valid_0's l1: 718264
[662]	valid_0's l1: 718271
[

[937]	valid_0's l1: 714105
[938]	valid_0's l1: 714089
[939]	valid_0's l1: 714090
[940]	valid_0's l1: 714077
[941]	valid_0's l1: 714065
[942]	valid_0's l1: 714053
[943]	valid_0's l1: 714027
[944]	valid_0's l1: 714009
[945]	valid_0's l1: 714009
[946]	valid_0's l1: 713993
[947]	valid_0's l1: 714002
[948]	valid_0's l1: 714002
[949]	valid_0's l1: 713983
[950]	valid_0's l1: 713940
[951]	valid_0's l1: 713934
[952]	valid_0's l1: 713926
[953]	valid_0's l1: 713920
[954]	valid_0's l1: 713924
[955]	valid_0's l1: 713892
[956]	valid_0's l1: 713901
[957]	valid_0's l1: 713880
[958]	valid_0's l1: 713871
[959]	valid_0's l1: 713865
[960]	valid_0's l1: 713846
[961]	valid_0's l1: 713839
[962]	valid_0's l1: 713820
[963]	valid_0's l1: 713786
[964]	valid_0's l1: 713794
[965]	valid_0's l1: 713792
[966]	valid_0's l1: 713791
[967]	valid_0's l1: 713790
[968]	valid_0's l1: 713775
[969]	valid_0's l1: 713775
[970]	valid_0's l1: 713759
[971]	valid_0's l1: 713723
[972]	valid_0's l1: 713706
[973]	valid_0's l1: 713712
[

[1241]	valid_0's l1: 710945
[1242]	valid_0's l1: 710934
[1243]	valid_0's l1: 710934
[1244]	valid_0's l1: 710931
[1245]	valid_0's l1: 710920
[1246]	valid_0's l1: 710916
[1247]	valid_0's l1: 710914
[1248]	valid_0's l1: 710899
[1249]	valid_0's l1: 710903
[1250]	valid_0's l1: 710881
[1251]	valid_0's l1: 710872
[1252]	valid_0's l1: 710867
[1253]	valid_0's l1: 710862
[1254]	valid_0's l1: 710869
[1255]	valid_0's l1: 710874
[1256]	valid_0's l1: 710886
[1257]	valid_0's l1: 710874
[1258]	valid_0's l1: 710831
[1259]	valid_0's l1: 710836
[1260]	valid_0's l1: 710817
[1261]	valid_0's l1: 710781
[1262]	valid_0's l1: 710779
[1263]	valid_0's l1: 710768
[1264]	valid_0's l1: 710770
[1265]	valid_0's l1: 710776
[1266]	valid_0's l1: 710788
[1267]	valid_0's l1: 710791
[1268]	valid_0's l1: 710786
[1269]	valid_0's l1: 710786
[1270]	valid_0's l1: 710782
[1271]	valid_0's l1: 710763
[1272]	valid_0's l1: 710766
[1273]	valid_0's l1: 710752
[1274]	valid_0's l1: 710750
[1275]	valid_0's l1: 710755
[1276]	valid_0's l1:

[1546]	valid_0's l1: 708940
[1547]	valid_0's l1: 708937
[1548]	valid_0's l1: 708899
[1549]	valid_0's l1: 708893
[1550]	valid_0's l1: 708880
[1551]	valid_0's l1: 708877
[1552]	valid_0's l1: 708878
[1553]	valid_0's l1: 708859
[1554]	valid_0's l1: 708851
[1555]	valid_0's l1: 708847
[1556]	valid_0's l1: 708847
[1557]	valid_0's l1: 708848
[1558]	valid_0's l1: 708844
[1559]	valid_0's l1: 708828
[1560]	valid_0's l1: 708814
[1561]	valid_0's l1: 708826
[1562]	valid_0's l1: 708819
[1563]	valid_0's l1: 708790
[1564]	valid_0's l1: 708776
[1565]	valid_0's l1: 708763
[1566]	valid_0's l1: 708747
[1567]	valid_0's l1: 708725
[1568]	valid_0's l1: 708723
[1569]	valid_0's l1: 708718
[1570]	valid_0's l1: 708730
[1571]	valid_0's l1: 708713
[1572]	valid_0's l1: 708712
[1573]	valid_0's l1: 708710
[1574]	valid_0's l1: 708707
[1575]	valid_0's l1: 708728
[1576]	valid_0's l1: 708726
[1577]	valid_0's l1: 708739
[1578]	valid_0's l1: 708734
[1579]	valid_0's l1: 708730
[1580]	valid_0's l1: 708727
[1581]	valid_0's l1:

[1856]	valid_0's l1: 707042
[1857]	valid_0's l1: 707037
[1858]	valid_0's l1: 707034
[1859]	valid_0's l1: 707023
[1860]	valid_0's l1: 707022
[1861]	valid_0's l1: 706982
[1862]	valid_0's l1: 706989
[1863]	valid_0's l1: 706977
[1864]	valid_0's l1: 706971
[1865]	valid_0's l1: 706962
[1866]	valid_0's l1: 706963
[1867]	valid_0's l1: 706973
[1868]	valid_0's l1: 706961
[1869]	valid_0's l1: 706968
[1870]	valid_0's l1: 706978
[1871]	valid_0's l1: 706989
[1872]	valid_0's l1: 706988
[1873]	valid_0's l1: 706967
[1874]	valid_0's l1: 706969
[1875]	valid_0's l1: 706978
[1876]	valid_0's l1: 706973
[1877]	valid_0's l1: 706955
[1878]	valid_0's l1: 706963
[1879]	valid_0's l1: 706957
[1880]	valid_0's l1: 706937
[1881]	valid_0's l1: 706944
[1882]	valid_0's l1: 706953
[1883]	valid_0's l1: 706959
[1884]	valid_0's l1: 706962
[1885]	valid_0's l1: 706957
[1886]	valid_0's l1: 706949
[1887]	valid_0's l1: 706943
[1888]	valid_0's l1: 706937
[1889]	valid_0's l1: 706929
[1890]	valid_0's l1: 706917
[1891]	valid_0's l1:

In [61]:
from sklearn.ensemble import RandomForestRegressor 

In [62]:
# create regressor object 
regressor = RandomForestRegressor(n_estimators = 100, random_state = 0) 
  
regression(df2, regressor)

MAE:  728295


In [64]:
df2.dtypes

habitaciones                            int64
garages                               float64
banos                                 float64
metroscubiertos                         int64
gimnasio                                 int8
jardin                                  int64
balcon                                  int64
usosmultiples                            int8
piscina                                  int8
escuelascercanas                         int8
centroscomercialescercanos               int8
precio                                float64
tipodepropiedad_Apartamento             uint8
tipodepropiedad_Bodega comercial        uint8
tipodepropiedad_Casa                    uint8
tipodepropiedad_Casa en condominio      uint8
tipodepropiedad_Edificio                uint8
tipodepropiedad_Local Comercial         uint8
tipodepropiedad_Oficina comercial       uint8
tipodepropiedad_Otro                    uint8
tipodepropiedad_Terreno                 uint8
tipodepropiedad_Terreno comercial 

In [68]:
reg = KNeighborsRegressor()

In [69]:
regression(df2, reg)

MAE:  883863


In [72]:
regression(df2, DecisionTreeRegressor())

MAE:  947761


In [74]:
regression(df2, GradientBoostingRegressor())

MAE:  811799
