In [None]:
from srai.loaders.osm_loaders import OSMPbfLoader
from srai.regionizers import H3Regionizer
from srai.joiners import IntersectionJoiner
from srai.embedders import ContextualCountEmbedder
from srai.plotting.folium_wrapper import plot_regions, plot_numeric_data
from srai.neighbourhoods import H3Neighbourhood

## Data preparation

In order to use `ContextualCountEmbedder` we need to prepare some data.  
Namely we need: `regions_gdf`, `features_gdf`, and `joint_gdf`.  
These are the outputs of Regionizers, Loaders and Joiners respectively.

In [None]:
from srai.utils import geocode_to_region_gdf

area_gdf = geocode_to_region_gdf("Lisboa, PT")
plot_regions(area_gdf)

### Regionize the area using an H3Regionizer

In [None]:
regionizer = H3Regionizer(resolution=9, buffer=True)
regions_gdf = regionizer.transform(area_gdf)
regions_gdf

### Download some objects from OpenStreetMap

In [None]:
from srai.loaders.osm_loaders.filters.hex2vec import HEX2VEC_FILTER

loader = OSMPbfLoader()
features_gdf = loader.load(area_gdf, tags=HEX2VEC_FILTER)
features_gdf

### Join the objects with the regions they belong to

In [None]:
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf)
joint_gdf

## Embed using features existing in data

`ContextualCountEmbedder` extends capabilities of basic `CountEmbedder` by incorporating the neighbourhood of embedded region. In this example we will use the `H3Neighbourhood`.

In [None]:
h3n = H3Neighbourhood()

### Squashed vector version (default)

Embedder will return vector of the same length as `CountEmbedder`, but will sum averaged values from the neighbourhoods diminished by the neighbour distance squared.

In [None]:
squash_cce = ContextualCountEmbedder(
    neighbourhood=h3n, neighbourhood_distance=10, squash_vectors=True
)
embeddings = squash_cce.transform(regions_gdf, features_gdf, joint_gdf)
embeddings

### Concatenated vector version (default)

Embedder will return vector of length `n * distance` where `n` is number of features from the `CountEmbedder` and `distance` is number of neighbourhoods analysed.

Each feature will be postfixed with `_n` string, where `n` is the current distance. Values are averaged from all neighbours.

In [None]:
wide_cce = ContextualCountEmbedder(
    neighbourhood=h3n, neighbourhood_distance=10, squash_vectors=False
)
wide_embeddings = wide_cce.transform(regions_gdf, features_gdf, joint_gdf)
wide_embeddings

### Plotting example features

In [None]:
plot_numeric_data(regions_gdf, embeddings, "leisure", tiles_style="CartoDB positron")

In [None]:
plot_numeric_data(regions_gdf, embeddings, "building", tiles_style="CartoDB positron")