In [None]:
from srai.loaders.osm_loaders import OSMPbfLoader
from srai.regionizers import H3Regionizer
from srai.joiners import IntersectionJoiner
from srai.embedders import ContextualCountEmbedder
from srai.plotting.folium_wrapper import plot_regions, plot_numeric_data
from srai.neighbourhoods import H3Neighbourhood

## Data preparation

In order to use `ContextualCountEmbedder` we need to prepare some data.  
Namely we need: `regions`, `features`, and `joint`.  
These are the outputs of Regionizers, Loaders and Joiners respectively.

In [None]:
from srai.utils import geocode_to_region_gdf

area_gdf = geocode_to_region_gdf("Lisboa, PT")
plot_regions(area_gdf)

### Regionize the area using an H3Regionizer

In [None]:
regionizer = H3Regionizer(resolution=9, buffer=True)
regions_gdf = regionizer.transform(area_gdf)
regions_gdf

### Download some objects from OpenStreetMap

You can use both `osm_tags_type` and `grouped_osm_tags_type` filters. In this example, a predefined `grouped_osm_tags_type` filter `BASE_OSM_GROUPS_FILTER` is used.

In [None]:
from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS

loader = OSMPbfLoader()
features_relation = loader.load(area_gdf, tags=GEOFABRIK_LAYERS)
features_relation

### Join the objects with the regions they belong to

In [None]:
joiner = IntersectionJoiner()
joint_relation = joiner.transform(regions_gdf, features_relation)
joint_relation

## Embed using features existing in data

`ContextualCountEmbedder` extends capabilities of basic `CountEmbedder` by incorporating the neighbourhood of embedded region. In this example we will use the `H3Neighbourhood`.

In [None]:
h3n = H3Neighbourhood()

### Squashed vector version (default)

Embedder will return vector of the same length as `CountEmbedder`, but will sum averaged values from the neighbourhoods diminished by the neighbour distance squared.

In [None]:
cce = ContextualCountEmbedder(
    neighbourhood=h3n, neighbourhood_distance=10, concatenate_vectors=False
)
embeddings = cce.transform(regions_gdf, features_relation, joint_relation)
embeddings

### Concatenated vector version

Embedder will return vector of length `n * distance` where `n` is number of features from the `CountEmbedder` and `distance` is number of neighbourhoods analysed.

Each feature will be postfixed with `_n` string, where `n` is the current distance. Values are averaged from all neighbours.

In [None]:
wide_cce = ContextualCountEmbedder(
    neighbourhood=h3n, neighbourhood_distance=10, concatenate_vectors=True
)
wide_embeddings = wide_cce.transform(regions_gdf, features_relation, joint_relation)
wide_embeddings

### Plotting example features

In [None]:
plot_numeric_data(regions_gdf, embeddings, "leisure", tiles_style="CartoDB positron")

In [None]:
plot_numeric_data(regions_gdf, embeddings, "transport", tiles_style="CartoDB positron")