In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
import common.common_machine_learning as common
import common.feature_num as feature_num
import common.features_datasets_externos as feature

In [2]:
train = pd.read_csv('sets_de_datos/train.csv', index_col = 0)
test = pd.read_csv('sets_de_datos/test.csv', index_col = 0)

In [3]:
train.columns

Index(['titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',
       'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',
       'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',
       'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',
       'centroscomercialescercanos', 'precio'],
      dtype='object')

### Promedio de precio para cantidad de baños según ciudad

In [4]:
banos_preciopromedio = train.groupby(["ciudad", "banos"])["precio"].mean().to_frame()

In [5]:
banos_preciopromedio = banos_preciopromedio.reset_index()

In [6]:
#banos_cantidad.reset_index().set_index("ciudad")
banos_preciopromedio = banos_preciopromedio.pivot_table(values='precio', index=banos_preciopromedio["ciudad"], columns='banos', aggfunc='first')

In [7]:
banos_preciopromedio.head()

banos,1.0,2.0,3.0,4.0
ciudad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abasolo,1907800.0,1156833.0,,8500000.0
Abejones,2500000.0,,,
Acajete,2368686.0,,3127500.0,
Acambay,2083333.0,1270000.0,,
Acapulco de Juárez,822812.5,1823339.0,3573077.0,5311640.0


### Promedio de precio para cantidad de habitaciones según ciudad

In [8]:
habitaciones_preciopromedio = train.groupby(["ciudad", "habitaciones"])["precio"].mean().to_frame()

In [9]:
habitaciones_preciopromedio = habitaciones_preciopromedio.reset_index()

In [10]:
habitaciones_preciopromedio = habitaciones_preciopromedio.pivot_table(values='precio', index=habitaciones_preciopromedio["ciudad"], columns='habitaciones', aggfunc="first")

In [11]:
habitaciones_preciopromedio.head()

habitaciones,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
ciudad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Abasolo,,1252800.0,1590000.0,1600500.0,,,,,,8500000.0
Abejones,,,2500000.0,,,,,,,
Acajete,2368686.0,5500000.0,2336667.0,,5800000.0,,,,,
Acambay,,625000.0,2030000.0,1200000.0,,,,,,
Acapulco de Juárez,1185437.0,1374582.0,2791943.0,5201431.0,3336639.0,2865238.0,2025000.0,4225000.0,,5750000.0


### Promedio de precio para cantidad de garages según ciudad

In [12]:
garages_preciopromedio = train.groupby(["ciudad", "garages"])["precio"].mean().to_frame()

In [13]:
garages_preciopromedio = garages_preciopromedio.reset_index()

In [14]:
garages_preciopromedio = garages_preciopromedio.pivot_table(values='precio', index=garages_preciopromedio["ciudad"], columns='garages', aggfunc="first")

In [15]:
garages_preciopromedio.head()

garages,0.0,1.0,2.0,3.0
ciudad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abalá,315000.0,,,
Abasolo,1140250.0,,4063333.0,
Acajete,,2368686.0,3662000.0,
Acambay,,450000.0,1240000.0,950000.0
Acapulco de Juárez,2290756.0,1748686.0,3692571.0,4235587.0


In [16]:
def asignar_caracteristica_precio_promedio(df, df_caracteristica, caracteristica, nombre):
    precio_promedio = []
    for index, row in train.iterrows():
        try:
            ciudad = row["ciudad"]
            cantidad_caracteristica = row[caracteristica]
            precio = df_caracteristica.loc[ciudad][cantidad_caracteristica]
        except:
            precio = np.nan
        precio_promedio.append(precio)
    df[nombre] = precio_promedio
    return df

In [17]:
train = asignar_caracteristica_precio_promedio(train, banos_preciopromedio, "banos", "banos_precio_promedio")

In [18]:
train = asignar_caracteristica_precio_promedio(train, habitaciones_preciopromedio, "habitaciones", "habitaciones_precio_promedio")

KeyboardInterrupt: 

In [None]:
train = asignar_caracteristica_precio_promedio(train, garages_preciopromedio, "garages", "garages_precio_promedio")

In [None]:
train_banos_preciopromedio = train.copy()
train_banos_preciopromedio = train_banos_preciopromedio.reset_index()
train_banos_preciopromedio = train_banos_preciopromedio[["id", "banos_precio_promedio"]]
train_banos_preciopromedio.head()

In [None]:
train_habitaciones_preciopromedio = train.copy()
train_habitaciones_preciopromedio = train_habitaciones_preciopromedio.reset_index()
train_habitaciones_preciopromedio = train_habitaciones_preciopromedio[["id", "habitaciones_precio_promedio"]]
train_habitaciones_preciopromedio.head()

In [None]:
train_garages_preciopromedio = train.copy()
train_garages_preciopromedio = train_garages_preciopromedio.reset_index()
train_garages_preciopromedio = train_garages_preciopromedio[["id", "garages_precio_promedio"]]
train_garages_preciopromedio.head()

In [19]:
import common.features_precio_promedio as feature

In [20]:
train = pd.read_csv('sets_de_datos/train.csv', index_col = 0)
test = pd.read_csv('sets_de_datos/test.csv', index_col = 0)

In [21]:
train = feature.asignar_precio_promedio_por_cantidad_de_banos_por_ciudad(train)

In [22]:
train.head()

Unnamed: 0_level_0,titulo,descripcion,tipodepropiedad,direccion,ciudad,provincia,antiguedad,habitaciones,garages,banos,...,lat,lng,fecha,gimnasio,usosmultiples,piscina,escuelascercanas,centroscomercialescercanos,precio,banos_precio_promedio
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
254099,depto. tipo a-402,"depto. interior de 80.15m2, consta de sala com...",Apartamento,Avenida Division del Norte 2005,Benito Juárez,Distrito Federal,,2.0,1.0,2.0,...,,,2015-08-23 00:00:00,0.0,0.0,0.0,0.0,0.0,2273000.0,3287627.0
53461,condominio horizontal en venta,"<p>entre sonora y guerrero, atr&aacute;s del h...",Casa en condominio,AV. MEXICO,La Magdalena Contreras,Distrito Federal,10.0,3.0,2.0,2.0,...,19.310205,-99.227655,2013-06-28 00:00:00,0.0,0.0,0.0,1.0,1.0,3600000.0,3962764.0
247984,casa en venta urbi 3 recamaras tonala,descripcion \nla mejor ubicacion residencial e...,Casa,Urbi Tonala,Tonalá,Jalisco,5.0,3.0,2.0,2.0,...,,,2015-10-17 00:00:00,0.0,0.0,0.0,0.0,0.0,1200000.0,1009003.0
209067,casa sola en toluca zinacantepec con credito i...,casa en privada con caseta de vigilancia casas...,Casa,IGNACIO MANUEL ALTAMIRANO 128,Zinacantepec,Edo. de México,1.0,2.0,1.0,1.0,...,19.30189,-99.688015,2012-03-09 00:00:00,0.0,0.0,0.0,1.0,1.0,650000.0,674923.9
185997,paseos del sol,bonito departamento en excelentes condiciones ...,Apartamento,PASEOS DEL SOL,Zapopan,Jalisco,10.0,2.0,1.0,1.0,...,,,2016-06-07 00:00:00,0.0,0.0,0.0,0.0,0.0,1150000.0,968181.3


In [23]:
test = feature.asignar_precio_promedio_por_cantidad_de_banos_por_ciudad(test)

In [24]:
test

Unnamed: 0_level_0,titulo,descripcion,tipodepropiedad,direccion,ciudad,provincia,antiguedad,habitaciones,garages,banos,...,idzona,lat,lng,fecha,gimnasio,usosmultiples,piscina,escuelascercanas,centroscomercialescercanos,banos_precio_promedio
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4941,"casa en venta en miguel hidalgo, distrito federal",<p>excelente casa estilo moderno.</p>,Casa,Bosque de Cedros,Miguel Hidalgo,Distrito Federal,29.0,3.0,,4.0,...,,19.408668,-99.246767,2013-07-20 00:00:00,0.0,0.0,0.0,0.0,0.0,7.728595e+06
51775,departamentos en venta en montebello,<p>departamento una recamara:\n</p><p>departam...,Apartamento,,Mérida,Yucatán,,1.0,1.0,1.0,...,113851.0,21.032480,-89.592424,2015-10-24 00:00:00,0.0,0.0,0.0,0.0,0.0,9.042383e+05
115253,departamento nuevo delegación coyoacán de 87 m...,"departamento nuevo de 87.06 m2, 1 cajón de est...",Apartamento,"Pueblo de los Reyes, Coyoacán, Mexico D.F.",Coyoacán,Distrito Federal,0.0,2.0,1.0,2.0,...,23620.0,19.332829,-99.152913,2015-05-30 00:00:00,0.0,0.0,0.0,0.0,1.0,3.142466e+06
299321,departamento en venta en acapulco,<p> raíces dv001 precioso departamento tipo k...,Apartamento,,Acapulco de Juárez,Guerrero,2.0,2.0,2.0,2.0,...,129347.0,16.860487,-99.878383,2015-04-02 00:00:00,0.0,0.0,0.0,0.0,0.0,1.823339e+06
173570,bonita casa sola equipada de dos niveles en lo...,"<p>casa sola, bonita de dos rec&aacute;maras u...",Casa,CEDROS,Tultitlán,Edo. de México,10.0,2.0,1.0,1.0,...,57125.0,19.640482,-99.127273,2013-08-15 00:00:00,0.0,0.0,0.0,1.0,1.0,6.400110e+05
30862,casa en venta parques de santa maria 3rec. por...,a un costado de parques de santa maría \ncon ...,Casa,Fresno 2601,Zapopan,Jalisco,10.0,3.0,1.0,2.0,...,48216.0,,,2016-01-11 00:00:00,0.0,0.0,0.0,0.0,0.0,2.059608e+06
244471,casa en venta en la col. cortijo del rio,"<p>excelente propiedad en zona sur, cocina amp...",Casa,SENDA DE LA CREACIÓN,Monterrey,Nuevo León,20.0,3.0,,2.0,...,323485.0,,,2016-06-24 00:00:00,1.0,0.0,0.0,1.0,0.0,2.088220e+06
127794,"casas javer, casas de 3 recamaras. a tan solo ...",bosques de lerma es un exclusivo fraccionamien...,Casa,lerma estado de mexico,Lerma,Edo. de México,0.0,2.0,1.0,1.0,...,54688.0,,,2016-01-30 00:00:00,0.0,0.0,0.0,1.0,1.0,7.783409e+05
71558,hermosa casa con alberca y acceso a la playa,"hermosa residencia, con alberca y acceso a la ...",Casa,Americas 2,Veracruz,Veracruz,2.0,3.0,2.0,2.0,...,107969.0,19.169978,-96.152714,2016-07-21 00:00:00,0.0,0.0,1.0,1.0,1.0,1.225230e+06
218011,casa en sm 30 cancun,descripción: \nplanta baja \n2 recamaras \n1 b...,Casa,YALAHAN,Cancún,Quintana Roo,20.0,3.0,1.0,2.0,...,50002836.0,21.157972,-86.838543,2015-10-12 00:00:00,0.0,0.0,0.0,1.0,0.0,2.068266e+06


904238.3496932515