# 🏠 California Housing Prices + H3 Spatial Indexing

This notebook demonstrates:

- Loading California housing data
- Converting geographic coordinates to H3 hexagonal indices
- Aggregating housing values spatially
- Visualizing data on a map using Folium

In [72]:
import pandas as pd
import folium
from folium import GeoJson, GeoJsonTooltip
import h3.api.basic_str as h3

In [73]:
df = pd.read_csv('housing.csv')

# Focus on subregion of California (Bay Area-like)
df = df[(df["latitude"] > 36.5) & (df["latitude"] < 38.5) &
        (df["longitude"] > -123.5) & (df["longitude"] < -121.0)]

# Drop missing values
df = df.dropna(subset=["latitude", "longitude"])

In [74]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [75]:
resolution = 6  # Neighborhood-level aggregation

df["h3_index"] = df.apply(
    lambda row: h3.latlng_to_cell(row["latitude"], row["longitude"], resolution),
    axis=1
)
# Preview the H3 result
df[["latitude", "longitude", "h3_index"]].head()

Unnamed: 0,latitude,longitude,h3_index
0,37.88,-122.23,8628308afffffff
1,37.86,-122.22,8628308afffffff
2,37.85,-122.24,8628308afffffff
3,37.85,-122.25,8628308afffffff
4,37.85,-122.25,8628308afffffff


In [76]:
#Aggregate by H3 Hexagon
agg_df = df.groupby("h3_index").agg({
    "median_house_value": "mean",
    "longitude": "first",  # orientacyjnie do mapy
    "latitude": "first",
    "total_rooms": "sum"
}).reset_index()

In [77]:
agg_df.head()

Unnamed: 0,h3_index,median_house_value,longitude,latitude,total_rooms
0,862830007ffffff,161300.0,-122.18,38.17,4093.0
1,86283000fffffff,231250.0,-122.28,38.22,319.0
2,86283001fffffff,320400.0,-122.18,38.23,2475.0
3,862830027ffffff,135747.619048,-122.24,38.11,80747.0
4,86283002fffffff,138471.428571,-122.26,38.15,23112.0


In [78]:
# Initialize map
m = folium.Map(location=[37.5, -122.3], zoom_start=8)

# Compute global color scale boundaries (min and max median value)
vmin = agg_df["median_house_value"].min()
vmax = agg_df["median_house_value"].max()

# Iterate over each aggregated H3 cell (row contains value and geometry info)
for _, row in agg_df.iterrows():
    # Get the polygon boundary of the H3 hexagon
    boundary = h3.cell_to_boundary(row["h3_index"])
    
    # Convert boundary from [lat, lng] to GeoJSON-friendly [lng, lat] format
    geojson_boundary = [[lng, lat] for lat, lng in boundary]

    # Construct a GeoJSON Feature for this hexagon
    polygon = {
        "type": "Feature",
        "geometry": {
            "type": "Polygon",
            "coordinates": [geojson_boundary]  # list of coordinates (ring)
        },
        "properties": {
            "value": f"${int(row['median_house_value']):,}"  # formatted label for tooltip
        }
    }

    # Add the hexagon as a styled GeoJSON object to the map
    GeoJson(
        polygon,
        style_function=lambda feature, val=row["median_house_value"]: {
            # Color determined dynamically based on value and global min/max
            "fillColor": color_scale(val, vmin, vmax),
            "color": "black",         # border color of hex
            "weight": 0.4,            # border thickness
            "fillOpacity": 0.6        # opacity of hex fill
        },
        tooltip=GeoJsonTooltip(
            fields=["value"],         # field to display from properties
            aliases=["Median Price"]  # label shown in tooltip
        )
    ).add_to(m)  # Add this layer to the folium map


In [79]:
# show map
m

In [80]:
# optionally save to html

m.save("california_housing_map.html")