In [None]:
from shapely import geometry
import geopandas as gpd
from srai.constants import WGS84_CRS, REGIONS_INDEX
from srai.loaders.osm_loaders import OSMOnlineLoader
from srai.regionizers import H3Regionizer
from srai.joiners import IntersectionJoiner
from srai.embedders import CountEmbedder
from srai.plotting.folium_wrapper import plot_regions, plot_numeric_data

## Data preparation


In order to use ```CountEmbedder``` we need to prepare some data.  
Namely we need: ```regions_gdf```, ```features_gdf```, and ```joint_gdf```.  
These are the outputs of Regionizers, Loaders and Joiners respectively.

### Define the bounding box polygon

In [None]:
bbox_polygon = geometry.Polygon(
    [
        [17.0198822, 51.1191217],
        [17.017436, 51.105004],
        [17.0485067, 51.1027944],
        [17.0511246, 51.1175054],
        [17.0198822, 51.1191217],
    ]
)
bbox_gdf = gpd.GeoDataFrame(geometry=[bbox_polygon], crs=WGS84_CRS)
bbox_gdf

### Regionize the area using an H3Regionizer

In [None]:
regionizer = H3Regionizer(resolution=8, buffer=True)
regions_gdf = regionizer.transform(bbox_gdf)
folium_map = bbox_gdf.explore(tiles="CartoDB positron")
plot_regions(regions_gdf, map=folium_map)

### Download some objects from OpenStreetMap

You can use both `osm_tags_type` and `grouped_osm_tags_type` filters. In this example, a simple `osm_tags_type` filter is used.

In [None]:
loader = OSMOnlineLoader()
tags = {
    "leisure": ["playground", "adult_gaming_centre"],
    "amenity": "pub",
}
features_gdf = loader.load(bbox_gdf, tags=tags)
features_gdf

In [None]:
folium_map = plot_regions(regions_gdf, tiles_style="CartoDB positron", colormap=["lightgray"])
features_gdf.explore(m=folium_map)

### Join the objects with the regions they belong to

In [None]:
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf, return_geom=True)
joint_gdf

In [None]:
from plotly.express import colors

folium_map = plot_regions(regions_gdf, tiles_style="CartoDB positron", colormap=["rgba(0,0,0,0)"])
joint_gdf.reset_index().explore(m=folium_map, column=REGIONS_INDEX, cmap=colors.qualitative.Bold)

## Embed using features existing in data

Count Embedder can count features on a higher level (tag key) or separately for each value (tag key and value). Both examples are shown below.

In [None]:
wide_embedder = CountEmbedder(count_subcategories=True)
wide_embedding = wide_embedder.transform(regions_gdf, features_gdf, joint_gdf)
wide_embedding

In [None]:
dense_embedder = CountEmbedder(count_subcategories=False)
dense_embedding = dense_embedder.transform(regions_gdf, features_gdf, joint_gdf)
dense_embedding

## Embed with specifying expected output features

In [None]:
embedder = CountEmbedder(
    expected_output_features=[
        "amenity_parking",
        "leisure_park",
        "leisure_playground",
        "amenity_pub",
    ]
)
embedding_expected_features = embedder.transform(regions_gdf, features_gdf, joint_gdf)
embedding_expected_features

In [None]:
plot_numeric_data(
    regions_gdf, embedding_expected_features, "leisure_playground", tiles_style="CartoDB positron"
)

The resulting embedding contains only the columns specified in `expected_output_features`.  
The ones that were not present in the data (`leisure_park`, `amenity_parking`) are added and filled with zeros.   
The features that are both expected and present in the data are counted as usual.  
The ones that are present in the data but are not expected (`leisure_adult_gaming_centre`) are discarded.