In [192]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style = "whitegrid")

import geopandas
from shapely.geometry import LineString, Point, Polygon
import shapely.wkt
import folium

In [193]:
def from_wkt(df, wkt_column):    
    # para transformar los wkt using shapely para convertir en objeto geografico
    # clase de shapely
    # geopandas no trae un method para import este formato
    df["geometry"]= df[wkt_column].apply(shapely.wkt.loads)     
    # ahora pasa a geopandas con geometry=serie anterior. un poligono para cada
    # registro de serie
    gdf = geopandas.GeoDataFrame(df, crs={'init': 'epsg:4326'}, geometry='geometry')  
    return gdf

def from_x_y(df, x, y):
    # lista de points de pares ordenados, set projection (crs)
    gdf = geopandas.GeoDataFrame(df.drop([x, y], axis=1),
                                crs={'init': 'epsg:4326'},
                                geometry=[Point(xy) for xy in zip(df[x], df[y])])  
    return gdf

def to_gkba(geodf):
    '''to transform crs to gkba, utilizada por Buenos Aires Data para medir distancias'''
    new_geodf = geodf.to_crs(crs = "+proj=tmerc +lat_0=-34.629269 +lon_0=-58.4633 +k=0.9999980000000001 +x_0=100000 +y_0=100000 +ellps=intl +units=m +no_defs")
    return new_geodf

In [194]:
# Read data
# Lectura del dataset usando como index la primera columna
props_df = pd.read_csv('properatti_clean.csv', index_col=0)
barrios_df = pd.read_csv("barrios.csv", encoding='latin1')
comisarias_df = pd.read_csv("comisarias.csv", encoding = "latin1")
subtes_df = pd.read_csv("subte.csv", encoding='latin1')
metrobus_df = pd.read_csv("metrobus.csv", encoding='latin1')
trenes_df = pd.read_csv("trenes.csv", encoding='latin1', delimiter=';')

In [195]:
# Cambiar , por . en comisarias
comisarias_df['X'] = comisarias_df['X'].apply(lambda x: float(x.replace(',', '.')))
comisarias_df['Y'] = comisarias_df['Y'].apply(lambda x: float(x.replace(',', '.')))

In [196]:
barrios_df.head()

Unnamed: 0,WKT,BARRIO,COMUNA,PERIMETRO,AREA
0,"POLYGON ((-58.4528200492791 -34.5959886570639,...",CHACARITA,15.0,7725.695228,3118101.0
1,"POLYGON ((-58.4655768128541 -34.5965577078058,...",PATERNAL,15.0,7087.513295,2229829.0
2,"POLYGON ((-58.4237529813037 -34.5978273383243,...",VILLA CRESPO,15.0,8132.699348,3613584.0
3,"POLYGON ((-58.4946097568899 -34.6148652395239,...",VILLA DEL PARQUE,11.0,7705.389797,3399596.0
4,"POLYGON ((-58.4128700313089 -34.6141162515854,...",ALMAGRO,5.0,8537.901368,4050752.0


In [197]:
subtes_df.head()

Unnamed: 0,X,Y,ID,ESTACION,LINEA
0,-58.398928,-34.63575,1.0,CASEROS,H
1,-58.40097,-34.629376,2.0,INCLAN,H
2,-58.402323,-34.623092,3.0,HUMBERTO 1°,H
3,-58.404732,-34.615242,4.0,VENEZUELA,H
4,-58.406036,-34.608935,5.0,ONCE - 30 DE DICIEMBRE,H


In [198]:
metrobus_df.head()

Unnamed: 0,long,lat,id,nombre,calle1,calle2,intersec,inaugura,lin_sent_n,lin_sent_s,metrobus
0,-58.526113,-34.638267,1,Liniers,FRANCISCO DE VIEDMA,,Entre Casco y Gana,si,34 - 109 - 166,34 - 109 - 166,Metrobus Juan B. Justo
1,-58.520143,-34.633497,2,Velez Sarsfield,JUSTO JUAN B.,ALVAREZ JONTE AV,JUSTO JUAN B. & ALVAREZ JONTE AV,si,34 - 99 - 166 - 172,34 - 99 - 166 - 172,Metrobus Juan B. Justo
2,-58.513913,-34.633458,3,Polideportivo Velez Sarsfield,JUSTO JUAN B.,GARCIA JUAN AGUSTIN,JUSTO JUAN B. & GARCIA JUAN AGUSTIN,si,34 - 99 - 166 - 172,34 - 99 - 166 - 172,Metrobus Juan B. Justo
3,-58.50673,-34.634341,4,Cortina,JUSTO JUAN B.,CORTINA,JUSTO JUAN B. & CORTINA,si,34 - 99 - 166 - 172,34 - 99 - 166 - 172,Metrobus Juan B. Justo
4,-58.501042,-34.632208,5,Av. Lope de Vega,JUSTO JUAN B.,LOPE DE VEGA,JUSTO JUAN B. & LOPE DE VEGA,si,34 - 99 - 166 - 172,34 - 99 - 166 - 172,Metrobus Juan B. Justo


In [199]:
comisarias_df.head()

Unnamed: 0,X,Y,NOMBRE
0,-58.468944,-34.683121,COMISARIA 52
1,-58.474649,-34.679169,COMISARIA 48
2,-58.501166,-34.661994,COMISARIA 42
3,-58.431981,-34.660395,COMISARIA 36
4,-58.40283,-34.64195,COMISARIA COMUNA 4


In [200]:
trenes_df.head()

Unnamed: 0,LAT,LNG,ID,NOMBRE,EMPRESA,LINEA,LINEA_2,BARRIO,COMUNA
0,-34.571334,-58.424295,2,3 DE FEBRERO,TBA - TRENES DE BS AS S.A.,MITRE,F.C.G.B.M.,PALERMO,COMUNA 14
1,-34.567571,-58.463056,4,BELGRANO R,TBA - TRENES DE BS AS S.A.,MITRE,F.C.G.B.M.,BELGRANO,COMUNA 13
2,-34.562549,-58.435864,6,LISANDRO DE LA TORRE,TBA - TRENES DE BS AS S.A.,MITRE,F.C.G.B.M.,PALERMO,COMUNA 14
3,-34.558443,-58.449498,7,BELGRANO C,TBA - TRENES DE BS AS S.A.,MITRE,F.C.G.B.M.,BELGRANO,COMUNA 13
4,-34.548895,-58.4624,8,NUÃEZ,TBA - TRENES DE BS AS S.A.,MITRE,F.C.G.B.M.,NUÃEZ,COMUNA 13


In [201]:
props_df.head()

Unnamed: 0,barrio,lat,lon,total,cubierta,precio,piso,habs,expensas,descripcion
2,Mataderos,-34.652262,-58.522982,55.0,55.0,1309.090909,,,,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...
7,Belgrano,-34.559873,-58.443362,45.0,40.0,3066.666667,,,,EXCELENTE MONOAMBIENTE A ESTRENAR AMPLIO SUPER...
8,Belgrano,-34.559873,-58.443362,65.0,60.0,3000.0,,,,EXCELENTE DOS AMBIENTES ESTRENAR AMPLIO SUPER...
19,Palermo,-34.580504,-58.405874,104.0,96.0,3365.384615,,3.0,,Excelente semipiso al contra frente en Bulnes ...
21,Palermo,-34.590926,-58.411665,118.0,73.0,2292.372881,,4.0,,"EXCELENTE ZONA, MULTIPLES MEDIOS DE TRANSPORTE..."


In [202]:
# Transformar los dataframes en geodataframes
barrios = from_wkt(barrios_df, 'WKT')
comisarias = from_x_y(comisarias_df, 'X', 'Y')
props = from_x_y(props_df, 'lon', 'lat')
subtes = from_x_y(subtes_df, 'X', 'Y')
metrobus = from_x_y(metrobus_df, 'long', 'lat')
trenes = from_x_y(trenes_df, 'LNG', 'LAT')

In [144]:
# Validar y consolidar la base props con los barrios oficiales
# result = geopandas.sjoin(props, barrios, how='left', op='within') 
results = pd.read_csv("results.csv", encoding='latin1')

In [146]:
props = results.drop(['barrio', 'WKT', 'PERIMETRO', 'AREA', 'index_right'], axis=1)

In [147]:
props.columns = props.columns.str.lower()
props.head()

Unnamed: 0,unnamed: 0,total,cubierta,precio,piso,habs,expensas,descripcion,geometry,barrio,comuna
0,2,55.0,55.0,1309.090909,,,,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...,POINT (-58.5229825 -34.6522615),LINIERS,9.0
1,7,45.0,40.0,3066.666667,,,,EXCELENTE MONOAMBIENTE A ESTRENAR AMPLIO SUPER...,POINT (-58.443362 -34.5598729),BELGRANO,13.0
2,8,65.0,60.0,3000.0,,,,EXCELENTE DOS AMBIENTES ESTRENAR AMPLIO SUPER...,POINT (-58.443362 -34.5598729),BELGRANO,13.0
3,19,104.0,96.0,3365.384615,,3.0,,Excelente semipiso al contra frente en Bulnes ...,POINT (-58.4058744847 -34.580503566),PALERMO,14.0
4,21,118.0,73.0,2292.372881,,4.0,,"EXCELENTE ZONA, MULTIPLES MEDIOS DE TRANSPORTE...",POINT (-58.4116653 -34.590926),RECOLETA,2.0


In [188]:
props.habs.value_counts()


len(props.barrio.unique())
props.barrio.unique()
props.barrio.unique().shape

2.0      2651
1.0      2632
3.0      2604
4.0      1694
5.0       403
NUÑEZ     348
6.0        99
7.0        41
8.0         7
9.0         6
12.0        2
10.0        1
Name: habs, dtype: int64

48

array(['LINIERS', 'BELGRANO', 'PALERMO', 'RECOLETA', 'FLORESTA', 'BOEDO',
       'BALVANERA', 'CABALLITO', 'PARQUE CHACABUCO', 'VILLA CRESPO',
       'FLORES', 'NUÑEZ', 'PUERTO MADERO', 'COLEGIALES', 'VILLA URQUIZA',
       'SAAVEDRA', 'PARQUE CHAS', 'BARRACAS', 'VILLA GRAL. MITRE',
       'AGRONOMIA', 'COGHLAN', 'ALMAGRO', 'SAN TELMO', 'MONSERRAT',
       'VILLA ORTUZAR', 'VILLA DEVOTO', 'BOCA', 'CHACARITA',
       'SAN CRISTOBAL', 'VERSALLES', 'MONTE CASTRO', 'VELEZ SARSFIELD',
       'SAN NICOLAS', 'PARQUE AVELLANEDA', 'RETIRO', 'NUEVA POMPEYA',
       'CONSTITUCION', 'VILLA PUEYRREDON', 'MATADEROS',
       'VILLA SANTA RITA', 'PARQUE PATRICIOS', 'VILLA DEL PARQUE',
       'VILLA LURO', 'VILLA LUGANO', 'VILLA REAL', 'PATERNAL',
       'VILLA RIACHUELO', 'VILLA SOLDATI'], dtype=object)

(48,)

In [149]:
# Corrige Nuñez
props.loc[props.barrio=='NUÃ\x91EZ'] = 'NUÑEZ'

In [150]:
props_to_check = props[props.barrio.isnull()] # ... POINTS donde el sjoin no encontró POLYGON que lo incluya

In [163]:
# Graficar para ver
m = folium.Map([-34.606359, -58.443863], zoom_start=12, tiles="OpenStreetMap")
folium.GeoJson(barrios.to_json()).add_to(m)
folium.GeoJson(props_to_check.to_json()).add_to(m)
m

<folium.features.GeoJson at 0x11c907f28>

<folium.features.GeoJson at 0x1261ff3c8>

In [152]:
# Eliminar propiedades sin geometry utilizable
props.shape
props = props[props.barrio.notnull()]
props.shape

(16323, 11)

(16300, 11)

In [153]:
# Transformar los dataframes para calcular distancias entre puntos

comisarias_gkba = to_gkba(comisarias)
subtes_gkba = to_gkba(subtes)
trenes_gkba = to_gkba(trenes)
metrobus_gkba = to_gkba(metrobus)

In [171]:
props.info()
props.head(2)
type(props.geometry)

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 16300 entries, 0 to 16322
Data columns (total 11 columns):
unnamed: 0     16300 non-null object
total          16300 non-null object
cubierta       15838 non-null object
precio         16300 non-null object
piso           2808 non-null object
habs           10488 non-null object
expensas       4364 non-null object
descripcion    16300 non-null object
geometry       16300 non-null object
barrio         16300 non-null object
comuna         16300 non-null object
dtypes: object(11)
memory usage: 1.5+ MB


Unnamed: 0,unnamed: 0,total,cubierta,precio,piso,habs,expensas,descripcion,geometry,barrio,comuna
0,2,55,55,1309.09,,,,2 AMBIENTES 3ER PISO LATERAL LIVING COMEDOR AM...,POINT (-58.5229825 -34.6522615),LINIERS,9
1,7,45,40,3066.67,,,,EXCELENTE MONOAMBIENTE A ESTRENAR AMPLIO SUPER...,POINT (-58.443362 -34.5598729),BELGRANO,13


geopandas.geoseries.GeoSeries

In [191]:
props.to_crs("")

{'init': 'epsg:4326'}

In [174]:
props = geopandas.GeoDataFrame(props, crs={'init': 'epsg:4326'})
a=props.geometry[0]

In [178]:
a=shapely.wkt.loads(a)

AttributeError: 'Point' object has no attribute 'encode'

In [179]:
type(a)

shapely.geometry.point.Point

In [187]:
props = geopandas.GeoDataFrame(props, crs={'init': 'epsg:4326'}, geometry=props['geometry'].apply(shapely.wkt.loads))



UnicodeEncodeError: 'ascii' codec can't encode character '\xd1' in position 2: ordinal not in range(128)

In [183]:
props['geometry2']=props['geometry']
type(props.geometry2)

pandas.core.series.Series