In [2]:
import geopandas as gpd
import pandas as pd

# Part 2 - Warsaw population analysis and visualization

This part will utilize geospatial knowledge presented earlier to prepare a visualization of Warsaw population distribution

### Task 1

Load Warsaw census data and districts boundaries. At the end find the top 3 most populated districts and plot their boundaries on the map

Data files:
* `../../data/warsaw_population.json`
* `../../data/warsaw_districts.geojson`

In [8]:
warsaw_population = ...
warsaw_districts = ...

# BEGIN SOLUTION
warsaw_population = pd.read_json('../../data/warsaw_population.json')
warsaw_districts = gpd.read_file('../../data/warsaw_districts.geojson').to_crs("EPSG:4326")

warsaw_districts = warsaw_districts.merge(warsaw_population, on='district', how='inner')

top_3_districts = warsaw_districts.sort_values(by='population', ascending=False).head(3)
top_3_districts.explore("population")
# END SOLUTION

### Task 2

Load all buildings in Warsaw. You can use `OSMOnlineLoader` from the `srai` library. 

In [10]:
from srai.loaders.osm_loaders import OSMOnlineLoader

loader = OSMOnlineLoader()

osm_building_types = [
    "residential",
    "apartments",
    "house",
    "semidetached_house",
    "detached",
]

osm_filter = {
    "building": osm_building_types,
    "building:levels": True,
}

In [42]:
warsaw_region = ...  # merge all districts into one polygon (remember to convert back to GeoDataFrame) and load osm data for this region

# BEGIN SOLUTION
warsaw_polygon = warsaw_districts.unary_union
warsaw_gdf = gpd.GeoDataFrame(geometry=[warsaw_polygon], crs="EPSG:4326")
buildings = loader.load(warsaw_gdf, osm_filter)
# END SOLUTION

Downloading building: residential       :   0%|          | 0/6 [00:00<?, ?it/s]

Downloading building:levels: True       : 100%|██████████| 6/6 [00:29<00:00,  4.97s/it]


OSM loader looks at those two tags (*building* and *building:levels*) independently. We need to clean the result to leave only entries with both of those tags. 

For cleaning we should assume that:
* we skip buildings of unknown type
* buildings without levels are assumed to have 1 floor
* levels should be integers

In [43]:
# BEGIN SOLUTION
import math

buildings = buildings[buildings["building"].isin(osm_building_types)]
buildings = buildings.fillna(1)

buildings["building:levels"] = buildings["building:levels"].map(lambda x: math.ceil(float(x))).astype(int)
# END SOLUTION

### Task 3

Approximate the distribution of population across buildings. We will do this in four steps:

* Calculate *inhabited_area* of each building, which we understand as a multiplication of its area by the number of floors. This is based on an assumption that in taller building lives more people
* Simplify each building to the single point on the map instead of a polygon
* Calculate *total_inhabited_area* for each district
* Calculate population of each building from the equation: `population = district_population * inhabited_area / total_inhabited_area`

In [44]:
# Start with first two tasks - caclulate the inhabited_area and convert buildings to points

# BEGIN SOLUTION
buildings['inhabited_area'] = buildings.area * buildings['building:levels']
buildings["geometry"] = buildings.centroid
# END SOLUTION

# This is some magic coordinates to zoom on a part of Warsaw
xmin = 21.042753111534097
xmax = 21.069257679735955
ymin = 52.24187245384607
ymax = 52.22533280016626

buildings.cx[xmin:xmax, ymin:ymax].explore()


  buildings['inhabited_area'] = buildings.area * buildings['building:levels']

  buildings["geometry"] = buildings.centroid


In [52]:
# Next, match buildings to districts, using spatial join operation and calculate total_inhabited_area for each district

buildings_with_districts = ...

# BEGIN SOLUTION
buildings_with_districts = buildings.sjoin(warsaw_districts, predicate='within')
# END SOLUTION

buildings_with_districts.head()

Unnamed: 0_level_0,geometry,building,building:levels,inhabited_area,index_right,district,population
feature_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
relation/68308,POINT (21.01596 52.22981),apartments,5,5.484487e-07,17,Śródmieście,101979
relation/71497,POINT (21.00041 52.23138),apartments,5,7.228364e-07,17,Śródmieście,101979
relation/71498,POINT (21.00132 52.23155),apartments,5,7.011146e-07,17,Śródmieście,101979
relation/75094,POINT (21.02276 52.21426),apartments,8,2.739072e-06,17,Śródmieście,101979
relation/1566190,POINT (21.02198 52.22647),apartments,5,7.259651e-07,17,Śródmieście,101979


In [53]:
# Calculate total_inhabited_area for each district

totals_in_districts = ...

# BEGIN SOLUTION
totals_in_districts = buildings_with_districts.groupby('district')['inhabited_area'].sum().rename("total_inhabited_area")
# END SOLUTION

totals_in_districts.head()

district
Bemowo       0.000786
Białołęka    0.001085
Bielany      0.000753
Mokotów      0.001672
Ochota       0.000546
Name: total_inhabited_area, dtype: float64

In [50]:
# Finally, calculate population in each building

buildings_with_population = ...

# BEGIN SOLUTION
buildings_with_population = buildings_with_districts.merge(totals_in_districts, on="district", how="inner")
buildings_with_population["population_in_building"] = (
    buildings_with_population["population"] * buildings_with_population["inhabited_area"] / buildings_with_population["total_inhabited_area"]
).round()
# END SOLUTION

buildings_with_population.head()

Unnamed: 0,geometry,building,building:levels,inhabited_area,index_right,district,population,total_inhabited_area,population_in_building
0,POINT (21.01596 52.22981),apartments,5,5.484487e-07,17,Śródmieście,101979,0.000894,63.0
1,POINT (21.00041 52.23138),apartments,5,7.228364e-07,17,Śródmieście,101979,0.000894,82.0
2,POINT (21.00132 52.23155),apartments,5,7.011146e-07,17,Śródmieście,101979,0.000894,80.0
3,POINT (21.02276 52.21426),apartments,8,2.739072e-06,17,Śródmieście,101979,0.000894,312.0
4,POINT (21.02198 52.22647),apartments,5,7.259651e-07,17,Śródmieście,101979,0.000894,83.0


## Aggregate data into H3 index for easier representation

In [None]:
import h3

In [None]:
buildings_with_population["h3"] = buildings_with_population.apply(
    lambda row: h3.latlng_to_cell(row.geometry.y, row.geometry.x, 8), axis=1
)
buildings_with_population

Unnamed: 0,geometry,building,building:levels,weight,index_right,district,population,total_weight,population_in_building,h3
0,POINT (21.01596 52.22981),apartments,5,5.484487e-07,17,Śródmieście,101979,0.000894,63.0,881f53c917fffff
1,POINT (21.00041 52.23138),apartments,5,7.228364e-07,17,Śródmieście,101979,0.000894,82.0,881f53c939fffff
2,POINT (21.00132 52.23155),apartments,5,7.011146e-07,17,Śródmieście,101979,0.000894,80.0,881f53c939fffff
3,POINT (21.02276 52.21426),apartments,8,2.739072e-06,17,Śródmieście,101979,0.000894,312.0,881f53c9e5fffff
4,POINT (21.02198 52.22647),apartments,5,7.259651e-07,17,Śródmieście,101979,0.000894,83.0,881f53c9edfffff
...,...,...,...,...,...,...,...,...,...,...
105204,POINT (21.03761 52.28451),apartments,1,5.913683e-08,13,Targówek,124240,0.000740,10.0,881f53c847fffff
105205,POINT (21.03685 52.28430),apartments,1,5.913760e-08,13,Targówek,124240,0.000740,10.0,881f53c847fffff
105206,POINT (21.03888 52.28485),apartments,1,5.915557e-08,13,Targówek,124240,0.000740,10.0,881f53c847fffff
105207,POINT (21.01928 52.28882),apartments,7,7.259561e-07,13,Targówek,124240,0.000740,122.0,881f53cb13fffff


In [None]:
population_aggregated = (
    buildings_with_population.groupby("h3")["population_in_building"]
    .sum()
    .rename("population_in_h3")
    .reset_index()
)
population_aggregated

Unnamed: 0,h3,population_in_h3
0,881f52240bfffff,157.0
1,881f522411fffff,25.0
2,881f522419fffff,6.0
3,881f52241dfffff,41.0
4,881f522443fffff,67.0
...,...,...
735,881f53d9d5fffff,35.0
736,881f53d9d7fffff,657.0
737,881f53d9ddfffff,370.0
738,881f53d9e3fffff,308.0


In [None]:
population_aggregated.population_in_h3.sum()

1862854.0

In [None]:
from srai.h3 import h3_to_geoseries

In [None]:
population_aggregated = gpd.GeoDataFrame(population_aggregated, geometry=h3_to_geoseries(population_aggregated.h3))
population_aggregated

Unnamed: 0,h3,population_in_h3,geometry
0,881f52240bfffff,157.0,"POLYGON ((20.93021 52.16150, 20.92750 52.15736..."
1,881f522411fffff,25.0,"POLYGON ((20.95566 52.15644, 20.95295 52.15230..."
2,881f522419fffff,6.0,"POLYGON ((20.95337 52.16392, 20.95066 52.15978..."
3,881f52241dfffff,41.0,"POLYGON ((20.94294 52.15897, 20.94023 52.15483..."
4,881f522443fffff,67.0,"POLYGON ((20.91519 52.17151, 20.91248 52.16737..."
...,...,...,...
735,881f53d9d5fffff,35.0,"POLYGON ((20.95272 52.36035, 20.95000 52.35622..."
736,881f53d9d7fffff,657.0,"POLYGON ((20.96548 52.35782, 20.96276 52.35369..."
737,881f53d9ddfffff,370.0,"POLYGON ((20.95041 52.36780, 20.94770 52.36367..."
738,881f53d9e3fffff,308.0,"POLYGON ((20.92135 52.34554, 20.91863 52.34141..."


## Show data on the map
3 proposed solutions:
- simple GeoPandas `.explore()`
- little bit prettier `srai.plotting` solution
- 3D Deck.gl map

In [None]:
# base folium - explore
population_aggregated.explore('population_in_h3')

In [None]:
# srai - plot numeric
from srai.plotting import plot_numeric_data

In [None]:
plot_numeric_data(
    population_aggregated.rename(columns={"h3": "region_id"}).set_index("region_id"),
    "population_in_h3",
)

In [None]:
# pydeck 3d
from srai.plotting.folium_wrapper import _generate_linear_colormap
import plotly.express as px
import pydeck as pdk

In [None]:
colormap = _generate_linear_colormap(
    # https://plotly.com/python/builtin-colorscales/
    px.colors.sequential.Aggrnyl_r,
    min_value=population_aggregated["population_in_h3"].min(),
    max_value=population_aggregated["population_in_h3"].max(),
)

population_aggregated["color"] = population_aggregated["population_in_h3"].map(
    colormap.rgb_bytes_tuple
)

# Define a layer to display on a map
layer = pdk.Layer(
    "H3HexagonLayer",
    population_aggregated,
    pickable=True,
    stroked=True,
    filled=True,
    extruded=True,
    get_hexagon="h3",
    get_fill_color="[color[0], color[1], color[2], 204]",
    elevation_scale=0.5,
    get_elevation="population_in_h3",
    coverage=0.8,
)

# Set the viewport location
view_state = pdk.ViewState(
    latitude=52.2317, longitude=21.0062, zoom=9.5, bearing=0, pitch=30
)


# Render
pdk.Deck(
    layers=[layer],
    map_style="light",
    initial_view_state=view_state,
    tooltip={"text": "Population: {population_in_h3}"},
)