In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
#from IPython.core.interactiveshell import InteractiveShell
#InteractiveShell.ast_node_interactivity = "all"

# **Importación de los datos de properati y AR**

In [8]:
data = pd.read_csv("../data/properatti.csv", sep = ",", low_memory=False) 
#data.head(3)
data.shape

''

In [3]:
#import AR.csv trayendo algunas de las columnas que tienen info de ubicación
columnas = ['num', 'place_name', 'place_name2','s20', 'Lat','Lon', 's', 'rnch', 'Pais','s0','s1','s2','s3','s4','s5','s6','s7','place_with_parent_names','fecha']
dataAR = pd.read_csv("../data/AR.tsv", header=None, names=columnas,  usecols=[1,2,4,5,8,17], sep = "\t", low_memory=False) 

#elimino la cadena de caracteres America/ y separo la columna place_with_parent_names en columnas, 
#renombro las columnas resultantes para coincidir con las columnas de data
dataAR["place_with_parent_names"] = dataAR["place_with_parent_names"].replace({'America/':''}, regex=True)
dataAR[["country_name", "state_name"]] = dataAR["place_with_parent_names"].str.split("/", expand=True)
dataAR["state_name"]=dataAR["state_name"].replace({'_':' '}, regex=True)
dataAR.drop("place_with_parent_names", axis=1, inplace=True)
#dataAR.head(3)
data.shape

(121220, 26)

# **Limpieza de datos**# **Limpieza de datos**

In [4]:
#elimino duplicados 
data.drop_duplicates(keep="first")


Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,properati_url,description,title,image_thumbnail
0,0,sell,PH,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6618237,-58.5088387",-34.661824,...,40.0,1127.272727,1550.000000,,,,http://www.properati.com.ar/15bo8_venta_ph_mat...,"2 AMBIENTES TIPO CASA PLANTA BAJA POR PASILLO,...",2 AMB TIPO CASA SIN EXPENSAS EN PB,https://thumbs4.properati.com/8/BluUYiHJLhgIIK...
1,1,sell,apartment,La Plata,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|,Argentina,Bs.As. G.B.A. Zona Sur,3432039.0,"-34.9038831,-57.9643295",-34.903883,...,,,,,,,http://www.properati.com.ar/15bob_venta_depart...,Venta de departamento en décimo piso al frente...,VENTA Depto 2 dorm. a estrenar 7 e/ 36 y 37 ...,https://thumbs4.properati.com/7/ikpVBu2ztHA7jv...
2,2,sell,apartment,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6522615,-58.5229825",-34.652262,...,55.0,1309.090909,1309.090909,,,,http://www.properati.com.ar/15bod_venta_depart...,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...,2 AMB 3ER PISO CON ASCENSOR APTO CREDITO,https://thumbs4.properati.com/5/SXKr34F_IwG3W_...
3,3,sell,PH,Liniers,|Argentina|Capital Federal|Liniers|,Argentina,Capital Federal,3431333.0,"-34.6477969,-58.5164244",-34.647797,...,,,,,,,http://www.properati.com.ar/15boh_venta_ph_lin...,PH 3 ambientes con patio. Hay 3 deptos en lote...,PH 3 amb. cfte. reciclado,https://thumbs4.properati.com/3/DgIfX-85Mog5SP...
4,4,sell,apartment,Centro,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,3435548.0,"-38.0026256,-57.5494468",-38.002626,...,35.0,1828.571429,1828.571429,,,,http://www.properati.com.ar/15bok_venta_depart...,DEPARTAMENTO CON FANTÁSTICA ILUMINACIÓN NATURA...,DEPTO 2 AMB AL CONTRAFRENTE ZONA CENTRO/PLAZA ...,https://thumbs4.properati.com/5/xrRqlNcSI_vs-f...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121215,121215,sell,apartment,Belgrano,|Argentina|Capital Federal|Belgrano|,Argentina,Capital Federal,3436077.0,,,...,93.0,7699.115044,9354.838710,,,10000.0,http://www.properati.com.ar/1cja2_venta_depart...,TORRE FORUM ALCORTA - MÁXIMA CATEGORÍA.Impecab...,Torre Forum Alcorta- Impecable 3 ambientes,https://thumbs4.properati.com/1/bjms0KnaAnlNoQ...
121216,121216,sell,house,Beccar,|Argentina|Bs.As. G.B.A. Zona Norte|San Isidro...,Argentina,Bs.As. G.B.A. Zona Norte,3436080.0,,,...,360.0,1383.333333,1383.333333,,,,http://www.properati.com.ar/1cja6_venta_casa_b...,Excelente e impecable casa en Venta en Las Lom...,Ruca Inmuebles | Venta | Lomas de San Isidro |...,https://thumbs4.properati.com/2/PCc3WuQDjpNZc4...
121217,121217,sell,apartment,Villa Urquiza,|Argentina|Capital Federal|Villa Urquiza|,Argentina,Capital Federal,3433775.0,"-34.5706388726,-58.4755963355",-34.570639,...,39.0,2858.695652,3371.794872,,,,http://www.properati.com.ar/1cja7_venta_depart...,VENTA DEPARTAMENTO AMBIENTE DIVISIBLE A ESTREN...,VENTA DEPARTAMENTO AMBIENTE DIVISIBLE A ESTREN...,https://thumbs4.properati.com/9/YAe_-2gRVykADP...
121218,121218,sell,apartment,Plaza Colón,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,,,,...,48.0,1997.916667,1997.916667,,,,http://www.properati.com.ar/1cja8_venta_depart...,"2 Amb al contrafrente, luminoso. El departame...",2 amb. C/ dep. de servicio al contrafrente| Re...,https://thumbs4.properati.com/8/Q12PTvU6BQJ0ib...


In [5]:
## pasar todas las columnas a minusculas
data_lower = data.applymap(lambda x: x if np.isreal(x) else str(x).lower())
# comparo los tipos de datos antes y después de pasar a minúsculas:
print(data_lower.dtypes == data.dtypes)
print(data_lower.head(3))

Unnamed: 0                    True
operation                     True
property_type                 True
place_name                    True
place_with_parent_names       True
country_name                  True
state_name                    True
geonames_id                   True
lat-lon                       True
lat                           True
lon                           True
price                         True
currency                      True
price_aprox_local_currency    True
price_aprox_usd               True
surface_total_in_m2           True
surface_covered_in_m2         True
price_usd_per_m2              True
price_per_m2                  True
floor                         True
rooms                         True
expenses                      True
properati_url                 True
description                   True
title                         True
image_thumbnail               True
dtype: bool
   Unnamed: 0 operation property_type place_name  \
0           0      sell   