In [None]:
from shapely import geometry
import geopandas as gpd
from srai.utils.constants import WGS84_CRS
from srai.loaders.osm_tag_loader import OSMTagLoader
from srai.regionizers import H3Regionizer
from srai.joiners import IntersectionJoiner
from srai.embedders import CountEmbedder

## Data preparation


In order to use ```CountEmbedder``` we need to prepare some data.  
Namely we need: ```regions_gdf```, ```features_gdf```, and ```joint_gdf```.  
These are the outputs of Regionizers, Loaders and Joiners respectively.

### Define the bounding box polygon

In [None]:
bbox_polygon = geometry.Polygon(
    [
        [17.0198822, 51.1191217],
        [17.017436, 51.105004],
        [17.0485067, 51.1027944],
        [17.0511246, 51.1175054],
        [17.0198822, 51.1191217],
    ]
)
bbox_gdf = gpd.GeoDataFrame(geometry=[bbox_polygon], crs=WGS84_CRS)
bbox_gdf

### Regionize the area using a H3Regionizer

In [None]:
regionizer = H3Regionizer(resolution=8, buffer=True)
regions_gdf = regionizer.transform(bbox_gdf)
ax = bbox_gdf.plot()
regions_gdf.plot(ax=ax, color="red", alpha=0.5)

### Download some objects from OpenStreetMap

In [None]:
loader = OSMTagLoader()
tags = {
    "leisure": ["playground", "adult_gaming_centre"],
    "amenity": "pub",
}
features_gdf = loader.load(bbox_gdf, tags=tags)
features_gdf

In [None]:
ax = regions_gdf.plot()
features_gdf.plot(
    ax=ax,
    color="red",
)

### Join the objects with the regions they belong to

In [None]:
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf)
joint_gdf

In [None]:
joint_gdf.plot()

## Embed using features existing in data

In [None]:
embedder = CountEmbedder()
embedding = embedder.transform(regions_gdf, features_gdf, joint_gdf)
embedding

## Embed with specifying expected output features

In [None]:
embedder = CountEmbedder(
    expected_output_features=["amenity_parking", "leisure_park", "amenity_pub"]
)
embedding_expected_features = embedder.transform(regions_gdf, features_gdf, joint_gdf)
embedding_expected_features

The resulting embedding contains only the columns specified in ```expected_output_features```.  
The ones that were not present in the data (```leisure_park```, ```amenity_parking```) are added and filled with zeros.   
The features that are both expected and present in the data are counted as usual.  
The ones that are present in the data but are not expected (```leisure_adult_gaming_centre```, ```leisure_playground```) are discarded.