In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

##### El archivo utilizado en este notebook se puede conseguir en:
    https://drive.google.com/file/d/0B_6KPWBDxfYQQ1NtWmswQm1fVG8/view
##### Para ejecutar el análisis, importar dicho archivo en la carpeta del notebook

### Google Maps

In [2]:
import gmaps
import gmaps.datasets

gmaps.configure(api_key = 'AIzaSyBhFLq5uLJ89BTo6d5mm4PpLoBJMg2A0gE')

#### Cargamos los datos con pickle

In [3]:
import cPickle as pickle

data = pickle.load(open("porpiedadesCompletas.p", "rb"))

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114048 entries, 0 to 114047
Data columns (total 28 columns):
id                            114048 non-null object
created_on                    114048 non-null object
operation                     114048 non-null object
property_type                 114048 non-null object
place_name                    114013 non-null object
place_with_parent_names       114048 non-null object
country_name                  114048 non-null object
state_name                    114048 non-null object
geonames_id                   97857 non-null float64
lat-lon                       75504 non-null object
lat                           75504 non-null float64
lon                           75504 non-null float64
price                         100126 non-null float64
currency                      100125 non-null object
price_aprox_local_currency    100126 non-null float64
price_aprox_usd               100126 non-null float64
surface_total_in_m2           82345 non

### Nos quedamos con las columnas que nos interesan

In [5]:
maps_df = data[['place_name', 'price_aprox_usd', 'surface_total_in_m2',\
                   'surface_covered_in_m2', 'price_usd_per_m2', 'latlon']]
maps_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114048 entries, 0 to 114047
Data columns (total 6 columns):
place_name               114013 non-null object
price_aprox_usd          100126 non-null float64
surface_total_in_m2      82345 non-null float64
surface_covered_in_m2    99504 non-null float64
price_usd_per_m2         72474 non-null float64
latlon                   114048 non-null object
dtypes: float64(4), object(2)
memory usage: 5.2+ MB


### Sacamos todas las entradas que no tengan geoubicacion

In [6]:
to_map = maps_df.dropna(subset = ['latlon'])
to_map.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 114048 entries, 0 to 114047
Data columns (total 6 columns):
place_name               114013 non-null object
price_aprox_usd          100126 non-null float64
surface_total_in_m2      82345 non-null float64
surface_covered_in_m2    99504 non-null float64
price_usd_per_m2         72474 non-null float64
latlon                   114048 non-null object
dtypes: float64(4), object(2)
memory usage: 6.1+ MB


### Rename por comodidad

In [7]:
to_map.columns = ['location', 'total_price', 'total_surface', 'covered_surface', 'm2_price', 'latlon']
to_map.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 114048 entries, 0 to 114047
Data columns (total 6 columns):
location           114013 non-null object
total_price        100126 non-null float64
total_surface      82345 non-null float64
covered_surface    99504 non-null float64
m2_price           72474 non-null float64
latlon             114048 non-null object
dtypes: float64(4), object(2)
memory usage: 6.1+ MB


### Rellenamos las superficies

In [8]:
def set_surface(row):
    total = row.total_surface
    covered = row.covered_surface
    if np.isnan(total):
        row.total_surface = covered
        return row
    return row

map_surface = to_map.apply(set_surface, axis = 1)
map_surface.head(5)

Unnamed: 0,location,total_price,total_surface,covered_surface,m2_price,latlon
0,San Antonio De Padua,120000.0,333.0,93.0,360.36036,"(-34.66692, -58.70097)"
1,San Antonio De Padua,220000.0,220.0,220.0,1000.0,"(-34.66692, -58.70097)"
2,Mataderos,185000.0,300.0,103.0,616.666667,"(-34.6544492346, -58.4895603251)"
3,Pilar,97000.0,54.0,54.0,,"(-34.429071, -58.795418)"
4,San Isidro,750000.0,,,,"(-34.4789383, -58.5214371)"


### Nos quedamos con las columnas que tengan valores utilizables

In [9]:
useful_map = map_surface[(map_surface.m2_price > 0) | ((map_surface.total_price > 0) & (map_surface.total_surface > 0))]
useful_map.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93643 entries, 0 to 114047
Data columns (total 6 columns):
location           93609 non-null object
total_price        93643 non-null float64
total_surface      93643 non-null float64
covered_surface    89725 non-null float64
m2_price           72473 non-null float64
latlon             93643 non-null object
dtypes: float64(4), object(2)
memory usage: 5.0+ MB


### Se rellenan los precios por m2

In [10]:
def set_m2_price(row):
    price = row.total_price
    surface = row.total_surface
    if np.isnan(row.m2_price):
        row.m2_price = price/surface
        return row
    return row

complete_map = useful_map.apply(set_m2_price, axis = 1)
complete_map.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93643 entries, 0 to 114047
Data columns (total 6 columns):
location           93609 non-null object
total_price        93643 non-null float64
total_surface      93643 non-null float64
covered_surface    89725 non-null float64
m2_price           93643 non-null float64
latlon             93643 non-null object
dtypes: float64(4), object(2)
memory usage: 5.0+ MB


## Rellenado de data para Pickle

In [11]:
def set_surface(row):
    total = row.surface_total_in_m2
    covered = row.surface_covered_in_m2
    if np.isnan(total):
        row.surface_total_in_m2 = covered
        return row
    return row

def set_m2_price(row):
    price = row.price_aprox_usd
    surface = row.surface_total_in_m2
    if np.isnan(row.price_usd_per_m2):
        row.price_usd_per_m2 = price/surface
        return row
    return row

In [12]:
data = data.apply(set_surface, axis = 1)

In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114048 entries, 0 to 114047
Data columns (total 28 columns):
id                            114048 non-null object
created_on                    114048 non-null object
operation                     114048 non-null object
property_type                 114048 non-null object
place_name                    114013 non-null object
place_with_parent_names       114048 non-null object
country_name                  114048 non-null object
state_name                    114048 non-null object
geonames_id                   97857 non-null float64
lat-lon                       75504 non-null object
lat                           75504 non-null float64
lon                           75504 non-null float64
price                         100126 non-null float64
currency                      100125 non-null object
price_aprox_local_currency    100126 non-null float64
price_aprox_usd               100126 non-null float64
surface_total_in_m2           104937 no

In [14]:
data = data[(data.price_usd_per_m2 > 0) | ((data.price_aprox_usd > 0) & (data.surface_total_in_m2 > 0))]

In [15]:
data = data.apply(set_m2_price, axis = 1)

In [16]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93643 entries, 0 to 114047
Data columns (total 28 columns):
id                            93643 non-null object
created_on                    93643 non-null object
operation                     93643 non-null object
property_type                 93643 non-null object
place_name                    93609 non-null object
place_with_parent_names       93643 non-null object
country_name                  93643 non-null object
state_name                    93643 non-null object
geonames_id                   80655 non-null float64
lat-lon                       61212 non-null object
lat                           61212 non-null float64
lon                           61212 non-null float64
price                         93643 non-null float64
currency                      93643 non-null object
price_aprox_local_currency    93643 non-null float64
price_aprox_usd               93643 non-null float64
surface_total_in_m2           93643 non-null float64

In [17]:
pickle.dump(data, open("fullPriceM2.p", "wb"))

### Se eliminan las entradas con columnas vacias

In [18]:
final_map = complete_map[['location', 'm2_price', 'latlon']].dropna()
final_map.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93609 entries, 0 to 114047
Data columns (total 3 columns):
location    93609 non-null object
m2_price    93609 non-null float64
latlon      93609 non-null object
dtypes: float64(1), object(2)
memory usage: 2.9+ MB


### Filtrado de precios

In [19]:
mapping_data = final_map[(final_map.m2_price > 150) & (final_map.m2_price < 8000)]

## Mapa general de CABA y GBA

In [20]:
locations = mapping_data["latlon"]
weights = mapping_data["m2_price"]
fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer)
fig

A Jupyter Widget

In [21]:
heatmap_layer.max_intensity = 300000
heatmap_layer.point_radius = 4.5

## División por grupos

### Grupo 1

In [22]:
group1 = mapping_data[\
                    mapping_data.location.str.contains('Puerto Madero') | \
                    mapping_data.location.str.contains('Cañitas') | \
                    mapping_data.location.str.contains('Palermo') | \
                    mapping_data.location.str.contains('Recoleta') | \
                    mapping_data.location.str.contains('Belgrano') | \
                    mapping_data.location.str.contains('Nuñez') | \
                    mapping_data.location.str.contains('Vicente López') | \
                    mapping_data.location.str.contains('Barrio Norte') | \
                    mapping_data.location.str.contains('Retiro') | \
                    mapping_data.location.str.contains('Olivos') | \
                    mapping_data.location.str.contains('La Lucila') | \
                    mapping_data.location.str.contains('Villa Crespo') | \
                    mapping_data.location.str.contains('Colegiales') | \
                    mapping_data.location.str.contains('Villa Urquiza') | \
                    mapping_data.location.str.contains('Barrio El Golf') | \
                    mapping_data.location.str.contains('Coghlan')]
group1.count()

location    20406
m2_price    20406
latlon      20406
dtype: int64

In [23]:
locations = group1["latlon"]
weights = group1["m2_price"]
fig = gmaps.figure()
heatmap_layer_g1 = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer_g1)
fig

A Jupyter Widget

In [24]:
heatmap_layer_g1.max_intensity = 10000
heatmap_layer_g1.point_radius = 3

### Grupo 2

In [25]:
group2 = mapping_data[\
                    mapping_data.location.str.contains('Barrio Cerrado') | \
                    mapping_data.location.str.contains('Portezuelo') | \
                    mapping_data.location.str.contains('Caballito') | \
                    mapping_data.location.str.contains('Saavedra') | \
                    mapping_data.location.str.contains('Ortuzar') | \
                    mapping_data.location.str.contains('Delta') | \
                    mapping_data.location.str.contains('San Telmo') | \
                    mapping_data.location.str.contains('Nordelta') | \
                    mapping_data.location.str.contains('Centro') | \
                    mapping_data.location.str.contains('Los Alisos') | \
                    mapping_data.location.str.contains('Centenario') | \
                    mapping_data.location.str.contains('San Isidro') | \
                    mapping_data.location.str.contains('Almagro') | \
                    mapping_data.location.str.contains('Chacarita') | \
                    mapping_data.location.str.contains('San Nicol') | \
                    mapping_data.location.str.contains('del Parque') | \
                    mapping_data.location.str.contains('Monserrat') | \
                    mapping_data.location.str.contains('Boedo') | \
                    mapping_data.location.str.contains('Devoto') | \
                    mapping_data.location.str.contains('Tribunales') | \
                    mapping_data.location.str.contains('Martinez') | \
                    mapping_data.location.str.contains('Florida') | \
                    mapping_data.location.str.contains('Abasto') | \
                    mapping_data.location.str.contains('San Cristobal') | \
                    mapping_data.location.str.contains('Agronom') | \
                    mapping_data.location.str.contains('Acassuso') | \
                    mapping_data.location.str.contains('Parque Chas') | \
                    mapping_data.location.str.contains('Villa Luro')]
group2.count()

location    15668
m2_price    15668
latlon      15668
dtype: int64

In [26]:
locations = group2["latlon"]
weights = group2["m2_price"]
fig = gmaps.figure()
heatmap_layer_g2 = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer_g2)
fig

A Jupyter Widget

In [27]:
heatmap_layer_g2.max_intensity = 10000
heatmap_layer_g2.point_radius = 3

#### Grupo 3

In [28]:
group3 = mapping_data[\
                    mapping_data.location.str.contains('Victoria') | \
                    mapping_data.location.str.contains('Villa Puey') | \
                    mapping_data.location.str.contains('Flores') | \
                    mapping_data.location.str.contains('Villa Santa Rita') | \
                    mapping_data.location.str.contains('Once') | \
                    mapping_data.location.str.contains('Barrio Los Lagos') | \
                    mapping_data.location.str.contains('Paternal') | \
                    mapping_data.location.str.contains('Balvanera') | \
                    mapping_data.location.str.contains('Monte Castro') | \
                    mapping_data.location.str.contains('Versalles') | \
                    mapping_data.location.str.contains('Tigre') | \
                    mapping_data.location.str.contains('Congreso') | \
                    mapping_data.location.str.contains('Parque Patricios') | \
                    mapping_data.location.str.contains('Barradas') | \
                    mapping_data.location.str.contains('Fincas de Iraola') | \
                    mapping_data.location.str.contains('Liniers') | \
                    mapping_data.location.str.contains('Ramos Mej') | \
                    mapping_data.location.str.contains('San Fernando') | \
                    mapping_data.location.str.contains('Quilmes') | \
                    mapping_data.location.str.contains('Zamora') | \
                    mapping_data.location.str.contains('Chacabuco') | \
                    mapping_data.location.str.contains('General Mitre') | \
                    mapping_data.location.str.contains('Floresta') | \
                    mapping_data.location.str.contains('Adrogu') | \
                    mapping_data.location.str.contains('Mayling') | \
                    mapping_data.location.str.contains('Constituci') | \
                    mapping_data.location.str.contains('City Bell') | \
                    mapping_data.location.str.contains('Banfield') | \
                    mapping_data.location.str.contains('Canning') | \
                    mapping_data.location.str.contains('Beccar') | \
                    mapping_data.location.str.contains('Peña') | \
                    mapping_data.location.str.contains('La Alameda') | \
                    mapping_data.location.str.contains('Villa Real') | \
                    mapping_data.location.str.contains('Munro') | \
                    mapping_data.location.str.contains('Wilde') | \
                    mapping_data.location.str.contains('Berazategui') | \
                    mapping_data.location.str.contains('Haedo') | \
                    mapping_data.location.str.contains('Martelli') | \
                    mapping_data.location.str.contains('Villa Sarmiento') | \
                    mapping_data.location.str.contains('La Plata') | \
                    mapping_data.location.str.contains('Milberg') | \
                    mapping_data.location.str.contains('Villa Rosa') | \
                    mapping_data.location.str.contains('Boca') | \
                    mapping_data.location.str.contains('Florida Oeste') | \
                    mapping_data.location.str.contains('Mataderos') | \
                    mapping_data.location.str.contains('Avellaneda') | \
                    mapping_data.location.str.contains('Santos Lugares')]
group3.count()

location    23713
m2_price    23713
latlon      23713
dtype: int64

In [29]:
locations = group3["latlon"]
weights = group3["m2_price"]
fig = gmaps.figure()
heatmap_layer_g3 = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer_g3)
fig

A Jupyter Widget

In [30]:
heatmap_layer_g3.max_intensity = 10000
heatmap_layer_g3.point_radius = 3

### Grupo 4

In [31]:
group4 = mapping_data[\
                    mapping_data.location.str.contains('Marmol') | \
                    mapping_data.location.str.contains('Matanza') | \
                    mapping_data.location.str.contains('Pilar') | \
                    mapping_data.location.str.contains('Parque Avellaneda') | \
                    mapping_data.location.str.contains('Lanús') | \
                    mapping_data.location.str.contains('Carapachay') | \
                    mapping_data.location.str.contains('Gonnet') | \
                    mapping_data.location.str.contains('Temperley') | \
                    mapping_data.location.str.contains('Bernal') | \
                    mapping_data.location.str.contains('Morón') | \
                    mapping_data.location.str.contains('Castelar') | \
                    mapping_data.location.str.contains('Luzuriaga') | \
                    mapping_data.location.str.contains('Ballester') | \
                    mapping_data.location.str.contains('General San Mart') | \
                    mapping_data.location.str.contains('Echeverr') | \
                    mapping_data.location.str.contains('Coronado') | \
                    mapping_data.location.str.contains('Ezeiza') | \
                    mapping_data.location.str.contains('Villa Elisa') | \
                    mapping_data.location.str.contains('Pacheco') | \
                    mapping_data.location.str.contains('Muñiz') | \
                    mapping_data.location.str.contains('Lanús Oeste') | \
                    mapping_data.location.str.contains('Lomas del Palomar') | \
                    mapping_data.location.str.contains('San Justo') | \
                    mapping_data.location.str.contains('Caseros') | \
                    mapping_data.location.str.contains('Barrio San Gabriel') | \
                    mapping_data.location.str.contains('Villa Adelina') | \
                    mapping_data.location.str.contains('Remedios de Escalada') | \
                    mapping_data.location.str.contains('Ciudadela') | \
                    mapping_data.location.str.contains('San Andres') | \
                    mapping_data.location.str.contains('Bosch') | \
                    mapping_data.location.str.contains('Loma Verde') | \
                    mapping_data.location.str.contains('San Miguel') | \
                    mapping_data.location.str.contains('Lomas del Mirador') | \
                    mapping_data.location.str.contains('Pompeya') | \
                    mapping_data.location.str.contains('Sarandi') | \
                    mapping_data.location.str.contains('Haras Santa') | \
                    mapping_data.location.str.contains('Monte Grande') | \
                    mapping_data.location.str.contains('Moreno') | \
                    mapping_data.location.str.contains('Villa Lynch') | \
                    mapping_data.location.str.contains('Virreyes') | \
                    mapping_data.location.str.contains('Del Viso') | \
                    mapping_data.location.str.contains('Escobar') | \
                    mapping_data.location.str.contains('Villa de Mayo') | \
                    mapping_data.location.str.contains('Lugano')]
group4.count()

location    17295
m2_price    17295
latlon      17295
dtype: int64

In [32]:
locations = group4["latlon"]
weights = group4["m2_price"]
fig = gmaps.figure()
heatmap_layer_g4 = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer_g4)
fig

A Jupyter Widget

In [33]:
heatmap_layer_g4.max_intensity = 10000
heatmap_layer_g4.point_radius = 3

### Grupo 5

In [34]:
group5 = mapping_data[\
                    mapping_data.location.str.contains('Tres de Febrero') | \
                    mapping_data.location.str.contains('Jose Leon Suarez') | \
                    mapping_data.location.str.contains('Lanús Este') | \
                    mapping_data.location.str.contains('Guernica') | \
                    mapping_data.location.str.contains('San Vicente') | \
                    mapping_data.location.str.contains('Villa Madero') | \
                    mapping_data.location.str.contains('Malvinas') | \
                    mapping_data.location.str.contains('General Rodriguez') | \
                    mapping_data.location.str.contains('Pila Village') | \
                    mapping_data.location.str.contains('Belén de Escobar') | \
                    #mapping_data.location.str.contains('San Martín') | \
                    mapping_data.location.str.contains('Boulogne') | \
                    mapping_data.location.str.contains('Bella Vista') | \
                    mapping_data.location.str.contains('Padua') | \
                    mapping_data.location.str.contains('Garin') | \
                    mapping_data.location.str.contains('Lavallol') | \
                    mapping_data.location.str.contains('Alsina') | \
                    mapping_data.location.str.contains('Casanova') | \
                    mapping_data.location.str.contains('Ituzaing') | \
                    mapping_data.location.str.contains('Tortuguitas') | \
                    mapping_data.location.str.contains('Glew') | \
                    mapping_data.location.str.contains('Merlo')]
group5.count()

location    3075
m2_price    3075
latlon      3075
dtype: int64

In [35]:
locations = group5["latlon"]
weights = group5["m2_price"]
fig = gmaps.figure()
heatmap_layer_g5 = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer_g5)
fig

A Jupyter Widget

In [36]:
heatmap_layer_g5.max_intensity = 5000
heatmap_layer_g5.point_radius = 3

### Grupo 6

In [37]:
group6 = mapping_data[\
                    mapping_data.location.str.contains('Hurlingam') | \
                    mapping_data.location.str.contains('Torcuato') | \
                    mapping_data.location.str.contains('Polvorines') | \
                    mapping_data.location.str.contains('Paso del Rey') | \
                    mapping_data.location.str.contains('Grand Bourg') | \
                    mapping_data.location.str.contains('La Tablada') | \
                    mapping_data.location.str.contains('Burzaco') | \
                    mapping_data.location.str.contains('Longchamps') | \
                    mapping_data.location.str.contains('Villa Libertado') | \
                    mapping_data.location.str.contains('José C Paz')]
group6.count()

location    981
m2_price    981
latlon      981
dtype: int64

In [38]:
locations = group6["latlon"]
weights = group6["m2_price"]
fig = gmaps.figure()
heatmap_layer_g6 = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(heatmap_layer_g6)
fig

A Jupyter Widget

In [39]:
heatmap_layer_g6.max_intensity = 2500
heatmap_layer_g6.point_radius = 4

In [40]:
fig = gmaps.figure()

fig.add_layer(heatmap_layer_g6)
fig.add_layer(heatmap_layer_g5)
fig.add_layer(heatmap_layer_g4)
fig.add_layer(heatmap_layer_g3)
fig.add_layer(heatmap_layer_g2)
fig.add_layer(heatmap_layer_g1)

fig

A Jupyter Widget

In [48]:
heatmap_layer_g1.point_radius = 2
heatmap_layer_g1.gradient = [(0,0,0,0.0), (0, 0, 200, 0.85), (0, 0, 250, 0.85)]
heatmap_layer_g2.point_radius = 2
heatmap_layer_g2.gradient = [(0,0,0,0.0), (0, 200, 0, 0.9), (0, 250, 0, 0.9)]
heatmap_layer_g3.point_radius = 2
heatmap_layer_g3.gradient = [(0,0,0,0.0), (200, 0, 0, 0.95), (250, 0, 0, 0.95)]
heatmap_layer_g4.point_radius = 3
heatmap_layer_g4.gradient = [(0,0,0,0.0), (200, 200, 0, 1.0), (250, 250, 0, 1.0)]
heatmap_layer_g5.point_radius = 3
heatmap_layer_g5.gradient = [(0,0,0,0.0), (0, 200, 200, 1.0), (0, 250, 250, 1.0)]
heatmap_layer_g6.point_radius = 3
heatmap_layer_g6.gradient = [(0,0,0,0.0), (200, 0, 200, 1.0), (250, 0, 250, 1.0)]