Setup

In [None]:
import locale
import numpy as np
import pandas as pd
from tqdm import tqdm
from geopy.geocoders import Nominatim

In [None]:
import plotly.express as px

In [None]:
locale.setlocale(locale.LC_ALL, '')

In [None]:
tqdm.pandas()

In [None]:
f"Last executed: {pd.Timestamp.today(tz='Europe/Rome')}"

# Data

In [None]:
store = "../data/houses.jl"

In [None]:
%%time
df = pd.read_json(store, lines=True)

In [None]:
df = df.set_index("_id")

In [None]:
df["price"] = df.price.str.replace("€ ", "").str.replace(".", "").astype("float")

In [None]:
df["area"] = df.area.str.replace(".", "").astype("float")

# Geocoding

## Retrieve location data

Run it just once to resolve addresses in geodata

Raw extraction of the address by removing words not address related

More than half location data were not retrieved by this approach

## Append location data

In [None]:
location_data = pd.read_feather("../data/houses_location.feather").set_index("_id")

In [None]:
location_data["lat"] = location_data.lat.astype("float")
location_data["lon"] = location_data.lon.astype("float")

In [None]:
df = df.join(location_data)

Just data that can be displayed

In [None]:
df = df[~df.lat.isna()]

In [None]:
df["f_price"] = df.price.apply(lambda p: locale.currency(p, grouping=True))

In [None]:
df["f_area"] = df.area.astype(str) + " m²"

In [None]:
df["url"] = df.index.to_series().apply(lambda _id: f"<a target='_blank' href='https://www.immobiliare.it/annunci/{_id}/>link</a>'")

In [None]:
df["rooms"] = df.rooms.str.replace("+","").astype("float")

In [None]:
df.head(1)

# Exploration

In [None]:
data = df[(140000 <= df.price) & (df.price <= 300000) & (df.rooms >= 2) & (df.rooms <= 3) & (df.area >= 50)]

In [None]:
def rescale_area(area):
    _min, _max = area.min(), area.max()
    return (area - _min) / (_min + _max)

In [None]:
fig = px.scatter_mapbox(data.reset_index(), lat="lat", lon="lon", hover_name="title", 
                        color="price", zoom=10.2, size=rescale_area(data.area.fillna(0)).round(1),
                        hover_data={"lat": False, "lon": False, 
                                    "_id": True, "f_price": True, 
                                    "f_area": True, "rooms": True, 
                                    "baths": True, "price": False, 
                                    "floor": True},
                        height=700, title="Milan housing")
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})