# Para instalar una biblioteca faltante: pip install "nombre biblioteca"

In [1]:
import numpy as np
import pandas as pd
import re as regex
import math as math
import requests as req #biblioteca para hacer peticiones HTTP a servicios: pip install request

In [2]:
pathArchivoDataSet = 'properatti.csv'
ds = pd.read_csv(pathArchivoDataSet)

In [3]:
ds.isnull().sum(axis=0)

Unnamed: 0                         0
operation                          0
property_type                      0
place_name                        23
place_with_parent_names            0
country_name                       0
state_name                         0
geonames_id                    18717
lat-lon                        51550
lat                            51550
lon                            51550
price                          20410
currency                       20411
price_aprox_local_currency     20410
price_aprox_usd                20410
surface_total_in_m2            39328
surface_covered_in_m2          19907
price_usd_per_m2               52603
price_per_m2                   33562
floor                         113321
rooms                          73830
expenses                      106958
properati_url                      0
description                        2
title                              0
image_thumbnail                 3112
dtype: int64

# Obtener datos de lat y long a partir del geonames_id, esto no da la ubicación exacta de la propiedad, sino uno coordenada dentro de una zona (por ej. Carlos Paz o San Fernando)

### La siguiente es la URL del servicio, requiere un el nombre de un usuario generado en el sitio https://www.geonames.org/ y un geonameId, obtenido de la columna geonames_id (que vamos a ir obteniendo mientras vamos leyendo el data frame

In [4]:
urlServicio = "http://api.geonames.org/getJSON"

### Parametros de la request

In [5]:
PARAMS = { 'username': 'rdgagliano', 'style': 'full', 'geonameId': 3429088 }

### Realizar la request

In [6]:
r = req.get(url = urlServicio, params = PARAMS)

In [7]:
r.json()

{'timezone': {'gmtOffset': -3,
  'timeZoneId': 'America/Argentina/Buenos_Aires',
  'dstOffset': -3},
 'bbox': {'east': -58.35540771500001,
  'south': -34.489437103,
  'north': -34.00189209000001,
  'west': -58.805011749000016,
  'accuracyLevel': 0},
 'asciiName': 'Partido de San Fernando',
 'astergdem': 20,
 'countryId': '3865483',
 'fcl': 'A',
 'srtm3': 21,
 'adminId2': '3429088',
 'countryCode': 'AR',
 'adminCodes1': {'ISO3166_2': 'B'},
 'adminId1': '3435907',
 'lat': '-34.44542',
 'fcode': 'ADM2',
 'continentCode': 'SA',
 'adminCode2': '06749',
 'adminCode1': '01',
 'lng': '-58.54202',
 'geonameId': 3429088,
 'toponymName': 'Partido de San Fernando',
 'population': 163462,
 'wikipediaURL': 'en.wikipedia.org/wiki/San_Fernando_Partido',
 'adminName5': '',
 'adminName4': '',
 'adminName3': '',
 'alternateNames': [{'name': 'https://en.wikipedia.org/wiki/San_Fernando_Partido',
   'lang': 'link'},
  {'name': 'Partido di San Fernando', 'lang': 'it'},
  {'name': 'Partido San Fernando', 'lan

### Con la respuesta podemos sacar la lat y long para completar aquellas filas del data frame que tengan el geonames_id pero no lat, long ni lat-long

### Eliminamos columnas innecesarias para el procesamiento

In [8]:
ds.drop(ds.columns.difference(['geonames_id','lat', 'lon', 'lat-lon']), 1, inplace=True)

In [9]:
ds.head(5)

Unnamed: 0,geonames_id,lat-lon,lat,lon
0,3430787.0,"-34.6618237,-58.5088387",-34.661824,-58.508839
1,3432039.0,"-34.9038831,-57.9643295",-34.903883,-57.96433
2,3430787.0,"-34.6522615,-58.5229825",-34.652262,-58.522982
3,3431333.0,"-34.6477969,-58.5164244",-34.647797,-58.516424
4,3435548.0,"-38.0026256,-57.5494468",-38.002626,-57.549447


### Solo nos quedamos con las filas en las que tengan el geonames_id

In [10]:
ds_slice = ds.loc[~ds['geonames_id'].isnull()]

In [11]:
ds_slice.reset_index()

Unnamed: 0,index,geonames_id,lat-lon,lat,lon
0,0,3430787.0,"-34.6618237,-58.5088387",-34.661824,-58.508839
1,1,3432039.0,"-34.9038831,-57.9643295",-34.903883,-57.964330
2,2,3430787.0,"-34.6522615,-58.5229825",-34.652262,-58.522982
3,3,3431333.0,"-34.6477969,-58.5164244",-34.647797,-58.516424
4,4,3435548.0,"-38.0026256,-57.5494468",-38.002626,-57.549447
...,...,...,...,...,...
102498,121214,3428983.0,,,
102499,121215,3436077.0,,,
102500,121216,3436080.0,,,
102501,121217,3433775.0,"-34.5706388726,-58.4755963355",-34.570639,-58.475596


### Quitamos geonames_id repetidos para no hacer peticiones innecesarias

In [12]:
ds_slice = ds_slice.drop_duplicates(subset = ['geonames_id'], keep='first')

In [13]:
def setLatLonFromGeonameId(dataFrameRow):    
    geonamesId = int(dataFrameRow.geonames_id)        
    
    #preparamos la peticion
    urlJson = "http://api.geonames.org/getJSON"
    parametros = { 'username': 'rdgagliano', 'style': 'full', 'geonameId': geonamesId }
    
    #realizamos la peticion
    respuesta = req.get(url = urlJson, params = parametros)
    
    #el objeto json es un diccionario
    json = respuesta.json()    
    
    try:
        dataFrameRow['lat'] = float(json['lat'])#se accede a las propiedades de json como diccionario
        dataFrameRow['lon'] = float(json['lng'])
        dataFrameRow['lat-lon'] = json['lat'] + ',' + json['lng']
    except:
        print(json)
        print(geonamesId)
    
    return dataFrameRow

In [14]:
ds_slice = ds_slice.apply(setLatLonFromGeonameId, axis=1)

{'status': {'message': 'the geoname feature does not exist.', 'value': 11}}
3435548


In [15]:
ds_slice[ds_slice.lat.isnull()]

Unnamed: 0,geonames_id,lat-lon,lat,lon


### Nos quedamos solamente con los que se pudieron completar

In [16]:
ds_slice = ds_slice.loc[~ds['geonames_id'].isnull() & (~ds_slice['lat'].isnull() & ~ds_slice['lon'].isnull() & ~ds_slice['lat-lon'].isnull())]

# Para ahorrar el tiempo que tarda en hacer todas las requests, guardamos los datos obtenidos en un CSV para su posterior carga

### Generamos el CSV del dataframe resultante

In [17]:
latLngCSVFileName = 'latLngFromGeonames (entrega 1).csv'

In [18]:
ds_slice.to_csv(latLngCSVFileName,index=False,encoding='utf-8')

### Cargamos el archivo

In [20]:
latLong = pd.read_csv(latLngCSVFileName)

In [21]:
latLong.head()

Unnamed: 0,geonames_id,lat-lon,lat,lon
0,3430787.0,"-34.66667,-58.5",-34.66667,-58.5
1,3432039.0,"-35,-58",-35.0,-58.0
2,3431333.0,"-34.64286,-58.52386",-34.64286,-58.52386
3,3435548.0,"-38.0026256,-57.5494468",-38.002626,-57.549447
4,3433657.0,"-33.25,-59",-33.25,-59.0


# Agregamos al data frame los datos de latitud y longitud a partir del CSV generador desde geonames_id

In [26]:
ds2 = pd.read_csv(pathArchivoDataSet)

In [27]:
ds2.head()

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lon,lat,...,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,properati_url,description,title,image_thumbnail
0,0,sell,PH,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6618237,-58.5088387",-34.661824,...,40.0,1127.272727,1550.0,,,,http://www.properati.com.ar/15bo8_venta_ph_mat...,"2 AMBIENTES TIPO CASA PLANTA BAJA POR PASILLO,...",2 AMB TIPO CASA SIN EXPENSAS EN PB,https://thumbs4.properati.com/8/BluUYiHJLhgIIK...
1,1,sell,apartment,La Plata,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|,Argentina,Bs.As. G.B.A. Zona Sur,3432039.0,"-34.9038831,-57.9643295",-34.903883,...,,,,,,,http://www.properati.com.ar/15bob_venta_depart...,Venta de departamento en décimo piso al frente...,VENTA Depto 2 dorm. a estrenar 7 e/ 36 y 37 ...,https://thumbs4.properati.com/7/ikpVBu2ztHA7jv...
2,2,sell,apartment,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6522615,-58.5229825",-34.652262,...,55.0,1309.090909,1309.090909,,,,http://www.properati.com.ar/15bod_venta_depart...,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...,2 AMB 3ER PISO CON ASCENSOR APTO CREDITO,https://thumbs4.properati.com/5/SXKr34F_IwG3W_...
3,3,sell,PH,Liniers,|Argentina|Capital Federal|Liniers|,Argentina,Capital Federal,3431333.0,"-34.6477969,-58.5164244",-34.647797,...,,,,,,,http://www.properati.com.ar/15boh_venta_ph_lin...,PH 3 ambientes con patio. Hay 3 deptos en lote...,PH 3 amb. cfte. reciclado,https://thumbs4.properati.com/3/DgIfX-85Mog5SP...
4,4,sell,apartment,Centro,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,3435548.0,"-38.0026256,-57.5494468",-38.002626,...,35.0,1828.571429,1828.571429,,,,http://www.properati.com.ar/15bok_venta_depart...,DEPARTAMENTO CON FANTÁSTICA ILUMINACIÓN NATURA...,DEPTO 2 AMB AL CONTRAFRENTE ZONA CENTRO/PLAZA ...,https://thumbs4.properati.com/5/xrRqlNcSI_vs-f...


In [30]:
ds2.merge(latLong, how='left', left_on='geonames_id', right_on='geonames_id', suffixes=(False, '_geonames'))

Unnamed: 0.1,Unnamed: 0,operation,property_type,place_name,place_with_parent_names,country_name,state_name,geonames_id,lat-lonFalse,latFalse,...,floor,rooms,expenses,properati_url,description,title,image_thumbnail,lat-lon_geonames,lat_geonames,lon_geonames
0,0,sell,PH,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6618237,-58.5088387",-34.661824,...,,,,http://www.properati.com.ar/15bo8_venta_ph_mat...,"2 AMBIENTES TIPO CASA PLANTA BAJA POR PASILLO,...",2 AMB TIPO CASA SIN EXPENSAS EN PB,https://thumbs4.properati.com/8/BluUYiHJLhgIIK...,"-34.66667,-58.5",-34.666670,-58.500000
1,1,sell,apartment,La Plata,|Argentina|Bs.As. G.B.A. Zona Sur|La Plata|,Argentina,Bs.As. G.B.A. Zona Sur,3432039.0,"-34.9038831,-57.9643295",-34.903883,...,,,,http://www.properati.com.ar/15bob_venta_depart...,Venta de departamento en décimo piso al frente...,VENTA Depto 2 dorm. a estrenar 7 e/ 36 y 37 ...,https://thumbs4.properati.com/7/ikpVBu2ztHA7jv...,"-35,-58",-35.000000,-58.000000
2,2,sell,apartment,Mataderos,|Argentina|Capital Federal|Mataderos|,Argentina,Capital Federal,3430787.0,"-34.6522615,-58.5229825",-34.652262,...,,,,http://www.properati.com.ar/15bod_venta_depart...,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...,2 AMB 3ER PISO CON ASCENSOR APTO CREDITO,https://thumbs4.properati.com/5/SXKr34F_IwG3W_...,"-34.66667,-58.5",-34.666670,-58.500000
3,3,sell,PH,Liniers,|Argentina|Capital Federal|Liniers|,Argentina,Capital Federal,3431333.0,"-34.6477969,-58.5164244",-34.647797,...,,,,http://www.properati.com.ar/15boh_venta_ph_lin...,PH 3 ambientes con patio. Hay 3 deptos en lote...,PH 3 amb. cfte. reciclado,https://thumbs4.properati.com/3/DgIfX-85Mog5SP...,"-34.64286,-58.52386",-34.642860,-58.523860
4,4,sell,apartment,Centro,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,3435548.0,"-38.0026256,-57.5494468",-38.002626,...,,,,http://www.properati.com.ar/15bok_venta_depart...,DEPARTAMENTO CON FANTÁSTICA ILUMINACIÓN NATURA...,DEPTO 2 AMB AL CONTRAFRENTE ZONA CENTRO/PLAZA ...,https://thumbs4.properati.com/5/xrRqlNcSI_vs-f...,"-38.0026256,-57.5494468",-38.002626,-57.549447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121215,121215,sell,apartment,Belgrano,|Argentina|Capital Federal|Belgrano|,Argentina,Capital Federal,3436077.0,,,...,,,10000.0,http://www.properati.com.ar/1cja2_venta_depart...,TORRE FORUM ALCORTA - MÁXIMA CATEGORÍA.Impecab...,Torre Forum Alcorta- Impecable 3 ambientes,https://thumbs4.properati.com/1/bjms0KnaAnlNoQ...,"-34.5627,-58.45829",-34.562700,-58.458290
121216,121216,sell,house,Beccar,|Argentina|Bs.As. G.B.A. Zona Norte|San Isidro...,Argentina,Bs.As. G.B.A. Zona Norte,3436080.0,,,...,,,,http://www.properati.com.ar/1cja6_venta_casa_b...,Excelente e impecable casa en Venta en Las Lom...,Ruca Inmuebles | Venta | Lomas de San Isidro |...,https://thumbs4.properati.com/2/PCc3WuQDjpNZc4...,"-34.46312,-58.53481",-34.463120,-58.534810
121217,121217,sell,apartment,Villa Urquiza,|Argentina|Capital Federal|Villa Urquiza|,Argentina,Capital Federal,3433775.0,"-34.5706388726,-58.4755963355",-34.570639,...,,,,http://www.properati.com.ar/1cja7_venta_depart...,VENTA DEPARTAMENTO AMBIENTE DIVISIBLE A ESTREN...,VENTA DEPARTAMENTO AMBIENTE DIVISIBLE A ESTREN...,https://thumbs4.properati.com/9/YAe_-2gRVykADP...,"-34.56667,-58.48333",-34.566670,-58.483330
121218,121218,sell,apartment,Plaza Colón,|Argentina|Buenos Aires Costa Atlántica|Mar de...,Argentina,Buenos Aires Costa Atlántica,,,,...,,,,http://www.properati.com.ar/1cja8_venta_depart...,"2 Amb al contrafrente, luminoso. El departame...",2 amb. C/ dep. de servicio al contrafrente| Re...,https://thumbs4.properati.com/8/Q12PTvU6BQJ0ib...,,,
