In [None]:
from labda import Subject
from labda.spatial.osm import features_from_dataframe

sbj = Subject.from_parquet("560102.parquet")

crs = sbj.metadata.crs
df = sbj.df
df = df.loc[df["trip_status"] == "stationary", ["latitude", "longitude", "trip_id"]]

tags = {
    "aerialway": False,
    "aeroway": False,
    "amenity": True,
    "barrier": False,
    "boundary": False,
    "building": True,
    "craft": True,
    "emergency": True,
    "geological": False,
    "healthcare": True,
    "highway": False,
    "historic": False,
    "landuse": False,
    "leisure": True,
    "man_made": False,
    "military": False,
    "natural": False,
    "office": True,
    "place": False,
    "power": False,
    "public_transport": False,
    "railway": False,
    "route": False,
    "shop": True,
    "telecom": False,
    "tourism": False,
    "water": False,
    "waterway": False,
}

features = features_from_dataframe(
    df, crs, tags, "trip_id", buffer=100, elements=["way"], geometry=["Polygon"]
)

In [26]:
from pydantic import (
    BaseModel,
    ConfigDict,
    TypeAdapter,
    field_validator,
    Field,
    model_validator,
    BeforeValidator,
)
from shapely.geometry import Polygon
from typing import Annotated, Any, Self
import numpy as np
import pandas as pd


def fix_nan(value: Any) -> Any:
    if isinstance(value, float):
        value = None if np.isnan(value) else value
    else:
        return value


def fix_building(value: Any) -> str | bool | None:
    return True if value == "yes" else value


def parse_leisure(value: Any) -> str | None:
    value = fix_nan(value)

    if value and value not in ["common"]:
        return value


# class Address(BaseModel):
#     house_number: str | None = Field(alias="housenumber")
#     name: str | None = Field(alias="housename")
#     conscription_number: str | None = Field(alias="conscriptionnumber")
#     street: str | None
#     place: str | None
#     postcode: str | None
#     city: str | None
#     country: str | None
#     state: str | None


class LocationOSM(BaseModel):
    id: int = Field(alias="osm_id")
    # address: Annotated[str | None, BeforeValidator(fix_nan)]
    name: Annotated[str | None, BeforeValidator(fix_nan)]
    geometry: Polygon
    amenity: Annotated[str | None, BeforeValidator(fix_nan)]
    building: Annotated[
        str | bool | None, BeforeValidator(fix_nan), BeforeValidator(fix_building)
    ]
    tags: list[str] = Field(default_factory=list)

    model_config = ConfigDict(
        arbitrary_types_allowed=True,
        extra="ignore",
    )

    @model_validator(mode="before")
    @classmethod
    def validate_tags(cls, values: Any) -> Any:
        tags = []

        tags.append(parse_leisure(values.get("leisure")))
        values["tags"] = [tag for tag in tags if tag is not None]

        return values


class Locations(BaseModel):
    values: list[LocationOSM] = Field(default_factory=list)

    @classmethod
    def _from_features(cls, df: pd.DataFrame) -> Self:
        locations = df.reset_index().to_dict(orient="records")
        return cls(values=locations)


locations = Locations._from_features(features)
locations.values[0]

LocationOSM(id=29383689, name='Næsby Skole', geometry=<POLYGON ((585900.819 6142142.015, 585895.717 6142142.25, 585894.76 6142130....>, amenity='school', building=None, tags=[])

# Columns to keep
- brand

## Maybe

In [None]:
import osmnx as ox

# features = ox.features_from_address("Odense", tags, dist=5000)

In [None]:
f = features.copy()

for col in f.columns:
    print(col)
    print(f"{f[col].value_counts()}")
    print("---")

In [None]:
f["addr:place"]